From 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 8 Apr 2013 18:41:23 +0000 Subject: [PATCH 1/2] Vendor import of llvm trunk r178860: http://llvm.org/svn/llvm-project/llvm/trunk@178860 --- .arcconfig | 4 + CMakeLists.txt | 26 +- CODE_OWNERS.TXT | 102 +- CREDITS.TXT | 15 +- LICENSE.TXT | 4 +- Makefile | 27 +- Makefile.common | 2 +- Makefile.config.in | 30 +- Makefile.rules | 39 +- README.txt | 10 +- autoconf/AutoRegen.sh | 2 +- autoconf/config.sub | 4 +- autoconf/configure.ac | 247 +- autoconf/m4/cxx_flag_check.m4 | 2 +- autoconf/m4/func_isinf.m4 | 22 +- autoconf/m4/huge_val.m4 | 10 +- autoconf/m4/single_cxx_check.m4 | 22 +- bindings/python/llvm/common.py | 50 +- bindings/python/llvm/disassembler.py | 11 + .../python/llvm/tests/test_disassembler.py | 13 +- cmake/config-ix.cmake | 34 + cmake/modules/AddLLVM.cmake | 34 +- cmake/modules/GetSVN.cmake | 25 + cmake/modules/HandleLLVMOptions.cmake | 128 +- cmake/modules/LLVM-Config.cmake | 7 +- cmake/modules/VersionFromVCS.cmake | 82 +- configure | 957 +- docs/AliasAnalysis.rst | 39 +- docs/Atomics.rst | 2 - docs/BitCodeFormat.rst | 6 +- docs/BranchWeightMetadata.rst | 12 +- docs/Bugpoint.rst | 6 +- docs/CMake.rst | 24 +- docs/CodeGenerator.rst | 153 +- docs/CodingStandards.rst | 86 +- docs/CommandGuide/FileCheck.rst | 219 +- docs/CommandGuide/bugpoint.rst | 84 +- docs/CommandGuide/index.rst | 3 +- docs/CommandGuide/lit.rst | 428 +- docs/CommandGuide/llc.rst | 176 +- docs/CommandGuide/lli.rst | 2 +- docs/CommandGuide/llvm-bcanalyzer.rst | 221 +- docs/CommandGuide/llvm-cov.rst | 40 +- docs/CommandGuide/llvm-link.rst | 86 +- docs/CommandGuide/llvm-stress.rst | 30 +- docs/CommandGuide/llvm-symbolizer.rst | 65 + docs/CommandGuide/opt.rst | 182 +- docs/CommandGuide/tblgen.rst | 139 +- docs/CommandLine.rst | 6 +- docs/CompilerWriterInfo.rst | 35 +- docs/DebuggingJITedCode.rst | 4 - docs/DeveloperPolicy.rst | 12 +- docs/Dummy.html | 0 docs/ExceptionHandling.rst | 49 +- docs/ExtendingLLVM.rst | 2 - docs/FAQ.rst | 39 +- docs/GCCFEBuildInstrs.html | 279 - docs/GarbageCollection.html | 1389 - docs/GarbageCollection.rst | 1029 + docs/GetElementPtr.rst | 8 +- docs/GettingStarted.rst | 241 +- docs/GettingStartedVS.rst | 2 - docs/GoldPlugin.rst | 4 - docs/HowToAddABuilder.rst | 4 - docs/HowToBuildOnARM.rst | 14 +- docs/HowToReleaseLLVM.html | 581 - docs/HowToReleaseLLVM.rst | 422 + docs/HowToSetUpLLVMStyleRTTI.rst | 76 +- docs/HowToSubmitABug.rst | 4 - docs/HowToUseAttributes.rst | 81 + docs/HowToUseInstrMappings.rst | 8 +- docs/LLVMBuild.html | 368 - docs/LLVMBuild.rst | 325 + docs/LangRef.html | 8776 --- docs/LangRef.rst | 8605 ++ docs/Lexicon.rst | 25 +- docs/LinkTimeOptimization.rst | 9 +- docs/Makefile.sphinx | 4 - docs/MakefileGuide.rst | 29 +- docs/MarkedUpDisassembly.rst | 2 - docs/NVPTXUsage.rst | 276 + docs/Packaging.rst | 2 - docs/Passes.html | 2049 - docs/Passes.rst | 1261 + docs/Phabricator.rst | 6 + docs/ProgrammersManual.html | 4156 - docs/ProgrammersManual.rst | 3204 + docs/Projects.rst | 10 +- docs/README.txt | 42 +- docs/ReleaseNotes.html | 975 - docs/ReleaseNotes.rst | 144 + docs/SegmentedStacks.rst | 2 - docs/SourceLevelDebugging.html | 2858 - docs/SourceLevelDebugging.rst | 2281 + docs/SphinxQuickstartTemplate.rst | 51 +- docs/SystemLibrary.html | 316 - docs/SystemLibrary.rst | 247 + docs/TableGen/LangRef.rst | 383 + docs/TableGenFundamentals.rst | 37 +- docs/TestSuiteMakefileGuide.html | 351 - docs/TestSuiteMakefileGuide.rst | 276 + docs/TestingGuide.html | 916 - docs/TestingGuide.rst | 455 + docs/Vectorizers.rst | 338 + docs/WritingAnLLVMBackend.html | 2557 - docs/WritingAnLLVMBackend.rst | 1838 + docs/WritingAnLLVMPass.html | 1954 - docs/WritingAnLLVMPass.rst | 1436 + docs/YamlIO.rst | 860 + docs/conf.py | 8 +- docs/design_and_overview.rst | 36 - docs/development_process.rst | 30 - docs/doxygen.footer | 2 +- docs/gcc-loops.png | Bin 0 -> 21535 bytes docs/index.rst | 432 +- docs/linpack-pc.png | Bin 0 -> 13578 bytes docs/mailing_lists.rst | 35 - docs/programming.rst | 57 - docs/subsystems.rst | 106 - docs/tutorial/LangImpl1.html | 348 - docs/tutorial/LangImpl1.rst | 278 + docs/tutorial/LangImpl2.html | 1231 - docs/tutorial/LangImpl2.rst | 1096 + docs/tutorial/LangImpl3.html | 1268 - docs/tutorial/LangImpl3.rst | 1160 + docs/tutorial/LangImpl4.html | 1152 - docs/tutorial/LangImpl4.rst | 1061 + docs/tutorial/LangImpl5.html | 1772 - docs/tutorial/LangImpl5.rst | 1607 + docs/tutorial/LangImpl6.html | 1829 - docs/tutorial/LangImpl6.rst | 1726 + docs/tutorial/LangImpl7.html | 2164 - docs/tutorial/LangImpl7.rst | 2003 + docs/tutorial/LangImpl8.html | 359 - docs/tutorial/LangImpl8.rst | 267 + docs/tutorial/OCamlLangImpl1.html | 365 - docs/tutorial/OCamlLangImpl1.rst | 285 + docs/tutorial/OCamlLangImpl2.html | 1043 - docs/tutorial/OCamlLangImpl2.rst | 896 + docs/tutorial/OCamlLangImpl3.html | 1093 - docs/tutorial/OCamlLangImpl3.rst | 961 + docs/tutorial/OCamlLangImpl4.html | 1026 - docs/tutorial/OCamlLangImpl4.rst | 915 + docs/tutorial/OCamlLangImpl5.html | 1560 - docs/tutorial/OCamlLangImpl5.rst | 1362 + docs/tutorial/OCamlLangImpl6.html | 1574 - docs/tutorial/OCamlLangImpl6.rst | 1441 + docs/tutorial/OCamlLangImpl7.html | 1904 - docs/tutorial/OCamlLangImpl7.rst | 1723 + docs/tutorial/OCamlLangImpl8.html | 359 - docs/tutorial/OCamlLangImpl8.rst | 267 + docs/tutorial/index.html | 48 - docs/tutorial/index.rst | 43 + docs/userguides.rst | 104 - docs/yaml2obj.rst | 2 - examples/BrainF/BrainF.cpp | 6 +- examples/BrainF/BrainF.h | 6 +- examples/BrainF/BrainFDriver.cpp | 4 +- examples/ExceptionDemo/ExceptionDemo.cpp | 41 +- examples/Fibonacci/fibonacci.cpp | 16 +- examples/HowToUseJIT/HowToUseJIT.cpp | 18 +- examples/Kaleidoscope/Chapter2/toy.cpp | 2 +- examples/Kaleidoscope/Chapter3/toy.cpp | 10 +- examples/Kaleidoscope/Chapter4/toy.cpp | 18 +- examples/Kaleidoscope/Chapter5/toy.cpp | 18 +- examples/Kaleidoscope/Chapter6/toy.cpp | 18 +- examples/Kaleidoscope/Chapter7/toy.cpp | 18 +- examples/ModuleMaker/ModuleMaker.cpp | 10 +- examples/ParallelJIT/ParallelJIT.cpp | 16 +- include/llvm-c/Core.h | 49 +- include/llvm-c/Disassembler.h | 19 +- include/llvm-c/EnhancedDisassembly.h | 530 - include/llvm-c/Initialization.h | 1 + include/llvm-c/LinkTimeOptimizer.h | 4 +- include/llvm-c/TargetMachine.h | 1 + .../llvm-c/Transforms/PassManagerBuilder.h | 8 +- include/llvm-c/lto.h | 11 +- include/llvm/ADT/APFloat.h | 13 +- include/llvm/ADT/APInt.h | 79 +- include/llvm/ADT/APSInt.h | 10 +- include/llvm/ADT/ArrayRef.h | 29 +- include/llvm/ADT/BitVector.h | 4 +- include/llvm/ADT/DAGDeltaAlgorithm.h | 2 +- include/llvm/ADT/DeltaAlgorithm.h | 2 +- include/llvm/ADT/DenseMap.h | 52 +- include/llvm/ADT/DenseSet.h | 4 +- include/llvm/ADT/DepthFirstIterator.h | 2 +- include/llvm/ADT/FoldingSet.h | 2 +- include/llvm/ADT/ImmutableIntervalMap.h | 4 +- include/llvm/ADT/ImmutableList.h | 6 +- include/llvm/ADT/ImmutableMap.h | 34 +- include/llvm/ADT/ImmutableSet.h | 33 +- include/llvm/ADT/IntervalMap.h | 22 +- include/llvm/ADT/IntrusiveRefCntPtr.h | 12 +- include/llvm/ADT/MapVector.h | 42 + include/llvm/ADT/None.h | 27 + include/llvm/ADT/NullablePtr.h | 4 +- include/llvm/ADT/Optional.h | 132 +- include/llvm/ADT/OwningPtr.h | 8 +- include/llvm/ADT/PointerIntPair.h | 27 +- include/llvm/ADT/PointerUnion.h | 21 +- include/llvm/ADT/PostOrderIterator.h | 2 +- include/llvm/ADT/PriorityQueue.h | 4 +- include/llvm/ADT/SCCIterator.h | 2 +- include/llvm/ADT/STLExtras.h | 6 +- include/llvm/ADT/SmallBitVector.h | 16 +- include/llvm/ADT/SmallPtrSet.h | 31 +- include/llvm/ADT/SmallSet.h | 3 +- include/llvm/ADT/SmallString.h | 2 +- include/llvm/ADT/SmallVector.h | 28 +- include/llvm/ADT/SparseMultiSet.h | 526 + include/llvm/ADT/SparseSet.h | 2 +- include/llvm/ADT/Statistic.h | 44 +- include/llvm/ADT/StringExtras.h | 13 +- include/llvm/ADT/StringMap.h | 14 +- include/llvm/ADT/StringRef.h | 9 +- include/llvm/ADT/StringSet.h | 26 +- include/llvm/ADT/TinyPtrVector.h | 2 +- include/llvm/ADT/Triple.h | 19 +- include/llvm/ADT/ValueMap.h | 3 +- include/llvm/ADT/VariadicFunction.h | 6 +- include/llvm/ADT/ilist.h | 31 +- include/llvm/ADT/ilist_node.h | 4 +- include/llvm/AddressingMode.h | 41 - include/llvm/Analysis/AliasAnalysis.h | 33 +- include/llvm/Analysis/AliasSetTracker.h | 3 +- include/llvm/Analysis/BlockFrequencyImpl.h | 6 +- include/llvm/Analysis/BranchProbabilityInfo.h | 4 +- include/llvm/Analysis/CFGPrinter.h | 6 +- include/llvm/Analysis/CallGraph.h | 6 +- .../llvm/{ => Analysis}/CallGraphSCCPass.h | 9 +- include/llvm/Analysis/CallPrinter.h | 27 + include/llvm/Analysis/CaptureTracking.h | 9 +- include/llvm/Analysis/CodeMetrics.h | 112 +- include/llvm/Analysis/DOTGraphTraitsPass.h | 119 +- include/llvm/Analysis/DependenceAnalysis.h | 42 +- include/llvm/Analysis/DominatorInternals.h | 2 +- include/llvm/Analysis/Dominators.h | 19 +- include/llvm/Analysis/IVUsers.h | 1 - include/llvm/Analysis/InlineCost.h | 212 +- include/llvm/Analysis/InstructionSimplify.h | 68 +- include/llvm/Analysis/Interval.h | 4 +- include/llvm/Analysis/IntervalIterator.h | 8 +- include/llvm/Analysis/IntervalPartition.h | 4 +- include/llvm/Analysis/LibCallAliasAnalysis.h | 4 +- include/llvm/Analysis/Loads.h | 2 +- include/llvm/Analysis/LoopInfo.h | 36 +- include/llvm/Analysis/LoopInfoImpl.h | 7 +- include/llvm/Analysis/LoopIterator.h | 5 +- include/llvm/Analysis/LoopPass.h | 8 +- include/llvm/Analysis/MemoryBuiltins.h | 23 +- .../llvm/Analysis/MemoryDependenceAnalysis.h | 95 +- include/llvm/Analysis/PHITransAddr.h | 2 +- include/llvm/Analysis/Passes.h | 3 - include/llvm/Analysis/PathNumbering.h | 10 +- include/llvm/Analysis/PathProfileInfo.h | 6 +- include/llvm/Analysis/PostDominators.h | 4 +- include/llvm/Analysis/ProfileDataLoader.h | 1 + include/llvm/Analysis/ProfileInfo.h | 2 +- include/llvm/Analysis/ProfileInfoLoader.h | 2 +- include/llvm/Analysis/PtrUseVisitor.h | 285 + include/llvm/Analysis/RegionInfo.h | 4 +- include/llvm/Analysis/RegionIterator.h | 6 +- include/llvm/Analysis/RegionPass.h | 11 +- include/llvm/Analysis/ScalarEvolution.h | 20 +- .../llvm/Analysis/ScalarEvolutionExpander.h | 14 +- .../Analysis/ScalarEvolutionExpressions.h | 151 +- .../Analysis/ScalarEvolutionNormalization.h | 4 +- include/llvm/Analysis/SparsePropagation.h | 8 +- include/llvm/Analysis/TargetTransformInfo.h | 349 + include/llvm/Analysis/Trace.h | 4 +- include/llvm/Analysis/ValueTracking.h | 22 +- include/llvm/Argument.h | 91 - include/llvm/Assembly/PrintModulePass.h | 6 + include/llvm/Attributes.h | 431 - include/llvm/Bitcode/Archive.h | 6 +- include/llvm/Bitcode/BitCodes.h | 27 +- include/llvm/Bitcode/BitstreamReader.h | 534 +- include/llvm/Bitcode/BitstreamWriter.h | 36 +- include/llvm/Bitcode/LLVMBitCodes.h | 31 +- include/llvm/Bitcode/ReaderWriter.h | 4 +- include/llvm/CMakeLists.txt | 8 +- include/llvm/CodeGen/Analysis.h | 12 +- include/llvm/CodeGen/AsmPrinter.h | 8 +- include/llvm/CodeGen/CalcSpillWeights.h | 2 +- include/llvm/CodeGen/CallingConvLower.h | 8 +- include/llvm/CodeGen/CommandFlags.h | 9 +- include/llvm/CodeGen/DAGCombine.h | 25 + include/llvm/CodeGen/DFAPacketizer.h | 4 +- include/llvm/CodeGen/FastISel.h | 20 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 13 +- include/llvm/CodeGen/GCMetadata.h | 5 +- include/llvm/CodeGen/GCs.h | 6 + include/llvm/CodeGen/ISDOpcodes.h | 9 +- include/llvm/CodeGen/IntrinsicLowering.h | 2 +- include/llvm/CodeGen/JITCodeEmitter.h | 9 +- include/llvm/CodeGen/LatencyPriorityQueue.h | 4 +- include/llvm/CodeGen/LexicalScopes.h | 10 +- .../llvm/CodeGen/LinkAllAsmWriterComponents.h | 1 + .../llvm/CodeGen/LinkAllCodegenComponents.h | 3 +- include/llvm/CodeGen/LiveInterval.h | 72 +- include/llvm/CodeGen/LiveIntervalAnalysis.h | 62 +- .../llvm}/CodeGen/LiveIntervalUnion.h | 11 +- include/llvm/CodeGen/LiveRangeEdit.h | 2 +- {lib => include/llvm}/CodeGen/LiveRegMatrix.h | 2 +- include/llvm/CodeGen/LiveStackAnalysis.h | 8 +- include/llvm/CodeGen/LiveVariables.h | 10 +- include/llvm/CodeGen/MachORelocation.h | 6 +- include/llvm/CodeGen/MachineBasicBlock.h | 145 +- .../CodeGen/MachineBranchProbabilityInfo.h | 3 +- include/llvm/CodeGen/MachineCodeEmitter.h | 1 - include/llvm/CodeGen/MachineCodeInfo.h | 4 +- include/llvm/CodeGen/MachineDominators.h | 72 +- include/llvm/CodeGen/MachineFrameInfo.h | 36 +- include/llvm/CodeGen/MachineFunction.h | 37 +- .../llvm/CodeGen/MachineFunctionAnalysis.h | 6 +- include/llvm/CodeGen/MachineFunctionPass.h | 4 +- include/llvm/CodeGen/MachineInstr.h | 184 +- include/llvm/CodeGen/MachineInstrBuilder.h | 185 +- include/llvm/CodeGen/MachineInstrBundle.h | 27 +- include/llvm/CodeGen/MachineJumpTableInfo.h | 2 +- include/llvm/CodeGen/MachineLoopInfo.h | 6 +- include/llvm/CodeGen/MachineLoopRanges.h | 112 - include/llvm/CodeGen/MachineModuleInfo.h | 26 +- include/llvm/CodeGen/MachineOperand.h | 45 +- include/llvm/CodeGen/MachinePostDominators.h | 20 +- include/llvm/CodeGen/MachineRegisterInfo.h | 37 +- include/llvm/CodeGen/MachineScheduler.h | 70 +- .../llvm}/CodeGen/MachineTraceMetrics.h | 48 +- include/llvm/CodeGen/PBQP/Graph.h | 4 +- include/llvm/CodeGen/PBQP/HeuristicSolver.h | 2 +- include/llvm/CodeGen/PBQP/Heuristics/Briggs.h | 3 +- include/llvm/CodeGen/PBQP/Math.h | 4 +- include/llvm/CodeGen/PBQP/Solution.h | 3 +- include/llvm/CodeGen/Passes.h | 42 +- include/llvm/CodeGen/PseudoSourceValue.h | 2 +- include/llvm/CodeGen/RegAllocPBQP.h | 2 +- include/llvm/CodeGen/RegAllocRegistry.h | 4 +- include/llvm/CodeGen/RegisterClassInfo.h | 28 +- include/llvm/CodeGen/RegisterPressure.h | 77 +- include/llvm/CodeGen/RegisterScavenging.h | 93 +- include/llvm/CodeGen/ResourcePriorityQueue.h | 6 +- include/llvm/CodeGen/RuntimeLibcalls.h | 51 + include/llvm/CodeGen/ScheduleDAG.h | 98 +- include/llvm/CodeGen/ScheduleDAGILP.h | 86 - include/llvm/CodeGen/ScheduleDAGInstrs.h | 66 +- include/llvm/CodeGen/ScheduleDFS.h | 196 + include/llvm/CodeGen/SchedulerRegistry.h | 4 +- .../llvm/CodeGen/ScoreboardHazardRecognizer.h | 1 - include/llvm/CodeGen/SelectionDAG.h | 41 +- include/llvm/CodeGen/SelectionDAGISel.h | 32 +- include/llvm/CodeGen/SelectionDAGNodes.h | 53 +- include/llvm/CodeGen/SlotIndexes.h | 75 +- .../CodeGen/TargetLoweringObjectFileImpl.h | 21 +- include/llvm/CodeGen/TargetSchedule.h | 13 +- include/llvm/CodeGen/ValueTypes.h | 182 +- include/llvm/CodeGen/ValueTypes.td | 66 +- {lib => include/llvm}/CodeGen/VirtRegMap.h | 14 +- include/llvm/Config/config.h.cmake | 23 +- include/llvm/Config/config.h.in | 25 +- include/llvm/Config/llvm-config.h.cmake | 9 + include/llvm/Config/llvm-config.h.in | 6 + include/llvm/DIBuilder.h | 132 +- include/llvm/DebugInfo.h | 352 +- include/llvm/DebugInfo/DIContext.h | 37 +- include/llvm/DefaultPasses.h | 168 - .../llvm/ExecutionEngine/ExecutionEngine.h | 12 +- include/llvm/ExecutionEngine/GenericValue.h | 21 +- include/llvm/ExecutionEngine/Interpreter.h | 4 +- include/llvm/ExecutionEngine/JIT.h | 4 +- .../llvm/ExecutionEngine/JITEventListener.h | 9 +- .../llvm/ExecutionEngine/JITMemoryManager.h | 5 +- include/llvm/ExecutionEngine/MCJIT.h | 4 +- .../llvm/ExecutionEngine/OProfileWrapper.h | 14 +- include/llvm/ExecutionEngine/ObjectBuffer.h | 160 +- include/llvm/ExecutionEngine/ObjectImage.h | 124 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 45 +- .../ExecutionEngine/SectionMemoryManager.h | 176 + include/llvm/GVMaterializer.h | 4 +- include/llvm/IR/Argument.h | 96 + include/llvm/IR/Attributes.h | 499 + include/llvm/{ => IR}/BasicBlock.h | 205 +- include/llvm/IR/CMakeLists.txt | 7 + include/llvm/{ => IR}/CallingConv.h | 8 +- include/llvm/{ => IR}/Constant.h | 19 +- include/llvm/{ => IR}/Constants.h | 159 +- include/llvm/{ => IR}/DataLayout.h | 96 +- include/llvm/{ => IR}/DerivedTypes.h | 30 +- include/llvm/{ => IR}/Function.h | 131 +- include/llvm/{ => IR}/GlobalAlias.h | 10 +- include/llvm/{ => IR}/GlobalValue.h | 6 +- include/llvm/{ => IR}/GlobalVariable.h | 49 +- include/llvm/{ => IR}/IRBuilder.h | 346 +- include/llvm/{ => IR}/InlineAsm.h | 6 +- include/llvm/{ => IR}/InstrTypes.h | 18 +- include/llvm/{ => IR}/Instruction.def | 0 include/llvm/{ => IR}/Instruction.h | 142 +- include/llvm/{ => IR}/Instructions.h | 268 +- include/llvm/{ => IR}/IntrinsicInst.h | 16 +- include/llvm/{ => IR}/Intrinsics.h | 12 +- include/llvm/{ => IR}/Intrinsics.td | 29 +- include/llvm/{ => IR}/IntrinsicsARM.td | 0 include/llvm/{ => IR}/IntrinsicsHexagon.td | 0 include/llvm/{ => IR}/IntrinsicsMips.td | 0 include/llvm/{ => IR}/IntrinsicsNVVM.td | 28 +- include/llvm/{ => IR}/IntrinsicsPowerPC.td | 3 +- include/llvm/IR/IntrinsicsR600.td | 36 + include/llvm/{ => IR}/IntrinsicsX86.td | 9 +- include/llvm/{ => IR}/IntrinsicsXCore.td | 0 include/llvm/{ => IR}/LLVMContext.h | 7 +- include/llvm/IR/MDBuilder.h | 186 + include/llvm/{ => IR}/Metadata.h | 48 +- include/llvm/{ => IR}/Module.h | 98 +- include/llvm/{ => IR}/OperandTraits.h | 6 +- include/llvm/{ => IR}/Operator.h | 188 +- include/llvm/{ => IR}/SymbolTableListTraits.h | 4 +- include/llvm/{ => IR}/Type.h | 18 +- include/llvm/{ => IR}/TypeBuilder.h | 8 +- include/llvm/{ => IR}/TypeFinder.h | 6 +- include/llvm/{ => IR}/Use.h | 13 +- include/llvm/{ => IR}/User.h | 26 +- include/llvm/{ => IR}/Value.h | 6 +- include/llvm/{ => IR}/ValueSymbolTable.h | 6 +- include/llvm/IRReader/IRReader.h | 55 + include/llvm/InitializePasses.h | 18 +- include/llvm/{Support => }/InstVisitor.h | 22 +- include/llvm/IntrinsicsCellSPU.td | 242 - include/llvm/{LinkAllVMCore.h => LinkAllIR.h} | 20 +- include/llvm/LinkAllPasses.h | 14 +- include/llvm/Linker.h | 168 +- include/llvm/MC/EDInstInfo.h | 29 - include/llvm/MC/MCAsmBackend.h | 7 +- include/llvm/MC/MCAsmInfo.h | 27 +- include/llvm/MC/MCAsmInfoCOFF.h | 6 +- include/llvm/MC/MCAsmInfoDarwin.h | 6 +- include/llvm/MC/MCAsmLayout.h | 27 +- include/llvm/MC/MCAssembler.h | 333 +- include/llvm/MC/MCAtom.h | 4 +- include/llvm/MC/MCCodeEmitter.h | 3 + include/llvm/MC/MCContext.h | 103 +- include/llvm/MC/MCDisassembler.h | 16 +- include/llvm/MC/MCDwarf.h | 151 +- {lib => include/llvm}/MC/MCELF.h | 2 + include/llvm/MC/MCELFObjectWriter.h | 1 - include/llvm/MC/MCELFStreamer.h | 125 + include/llvm/MC/MCExpr.h | 19 +- include/llvm/MC/MCFixedLenDisassembler.h | 4 +- include/llvm/MC/MCInstBuilder.h | 68 + include/llvm/MC/MCInstPrinter.h | 14 +- include/llvm/MC/MCInstrDesc.h | 100 +- include/llvm/MC/MCMachObjectWriter.h | 18 +- include/llvm/MC/MCObjectFileInfo.h | 82 +- include/llvm/MC/MCObjectStreamer.h | 26 +- include/llvm/MC/MCObjectWriter.h | 21 +- include/llvm/MC/MCParser/AsmCond.h | 4 +- include/llvm/MC/MCParser/AsmLexer.h | 4 +- include/llvm/MC/MCParser/MCAsmLexer.h | 14 +- include/llvm/MC/MCParser/MCAsmParser.h | 59 +- .../llvm/MC/MCParser/MCAsmParserExtension.h | 6 +- include/llvm/MC/MCParser/MCParsedAsmOperand.h | 17 +- include/llvm/MC/MCRegisterInfo.h | 97 +- include/llvm/MC/MCSchedule.h | 23 +- include/llvm/MC/MCSection.h | 6 + include/llvm/MC/MCSectionCOFF.h | 8 +- include/llvm/MC/MCSectionELF.h | 11 +- include/llvm/MC/MCSectionMachO.h | 10 +- include/llvm/MC/MCStreamer.h | 88 +- include/llvm/MC/MCSubtargetInfo.h | 2 +- include/llvm/MC/MCTargetAsmLexer.h | 89 - include/llvm/MC/MCTargetAsmParser.h | 10 + include/llvm/MC/MCValue.h | 2 +- include/llvm/MC/MCWinCOFFObjectWriter.h | 3 + include/llvm/MC/SubtargetFeature.h | 2 +- include/llvm/MDBuilder.h | 162 - include/llvm/Object/Archive.h | 114 +- include/llvm/Object/Binary.h | 8 +- include/llvm/Object/ELF.h | 1544 +- include/llvm/Object/MachO.h | 13 +- include/llvm/Object/MachOFormat.h | 20 +- include/llvm/Object/MachOObject.h | 5 +- include/llvm/Object/ObjectFile.h | 6 +- include/llvm/Object/RelocVisitor.h | 126 +- include/llvm/Option/Arg.h | 132 + include/llvm/Option/ArgList.h | 414 + include/llvm/Option/OptParser.td | 127 + include/llvm/Option/OptSpecifier.h | 39 + include/llvm/Option/OptTable.h | 161 + include/llvm/Option/Option.h | 193 + include/llvm/Pass.h | 31 +- include/llvm/PassAnalysisSupport.h | 6 +- include/llvm/PassManagers.h | 29 +- include/llvm/PassSupport.h | 8 +- include/llvm/Support/AlignOf.h | 182 +- include/llvm/Support/Allocator.h | 4 +- include/llvm/Support/ArrayRecycler.h | 143 + include/llvm/Support/Atomic.h | 4 +- include/llvm/Support/CFG.h | 20 +- include/llvm/Support/COFF.h | 7 +- include/llvm/Support/CallSite.h | 19 +- include/llvm/Support/Casting.h | 64 +- include/llvm/Support/CommandLine.h | 12 +- include/llvm/Support/Compiler.h | 189 +- include/llvm/Support/ConstantFolder.h | 4 +- include/llvm/Support/ConstantRange.h | 4 +- include/llvm/Support/ConvertUTF.h | 228 + include/llvm/Support/DOTGraphTraits.h | 5 + include/llvm/Support/DataExtractor.h | 20 +- include/llvm/Support/DataFlow.h | 2 +- include/llvm/Support/DataStream.h | 4 +- include/llvm/Support/DebugLoc.h | 28 +- include/llvm/Support/Dwarf.h | 46 +- include/llvm/Support/DynamicLibrary.h | 4 +- include/llvm/Support/ELF.h | 202 +- include/llvm/Support/Endian.h | 165 +- include/llvm/Support/Errno.h | 4 +- include/llvm/Support/ErrorHandling.h | 17 +- include/llvm/Support/ErrorOr.h | 514 + include/llvm/Support/FEnv.h | 8 +- include/llvm/Support/FileOutputBuffer.h | 58 +- include/llvm/Support/FileSystem.h | 12 +- include/llvm/Support/FormattedStream.h | 208 +- include/llvm/Support/GCOV.h | 4 +- .../llvm/Support/GetElementPtrTypeIterator.h | 16 +- include/llvm/Support/GraphWriter.h | 18 +- include/llvm/Support/Host.h | 8 +- include/llvm/Support/IRReader.h | 112 - include/llvm/Support/IncludeFile.h | 4 +- include/llvm/Support/InstIterator.h | 4 +- include/llvm/Support/IntegersSubset.h | 13 +- include/llvm/Support/IntegersSubsetMapping.h | 6 +- include/llvm/Support/LEB128.h | 4 +- include/llvm/Support/Locale.h | 6 +- include/llvm/Support/LockFileManager.h | 1 + include/llvm/Support/MathExtras.h | 22 +- include/llvm/Support/Memory.h | 4 +- include/llvm/Support/MemoryObject.h | 4 +- include/llvm/Support/Mutex.h | 4 +- include/llvm/Support/NoFolder.h | 4 +- include/llvm/Support/PassNameParser.h | 6 +- include/llvm/Support/PathV1.h | 4 +- include/llvm/Support/PatternMatch.h | 312 +- include/llvm/Support/PredIteratorCache.h | 4 +- include/llvm/Support/Process.h | 328 +- include/llvm/Support/Program.h | 23 +- include/llvm/Support/Recycler.h | 11 + include/llvm/Support/Regex.h | 15 +- include/llvm/Support/RegistryParser.h | 6 +- include/llvm/Support/SMLoc.h | 14 +- include/llvm/Support/SaveAndRestore.h | 4 +- include/llvm/Support/Signals.h | 8 +- include/llvm/Support/Solaris.h | 4 +- include/llvm/Support/SourceMgr.h | 80 +- include/llvm/Support/StreamableMemoryObject.h | 6 +- include/llvm/Support/StringPool.h | 2 +- include/llvm/Support/SwapByteOrder.h | 4 +- include/llvm/Support/TargetFolder.h | 4 +- include/llvm/Support/TargetRegistry.h | 60 +- include/llvm/Support/ThreadLocal.h | 6 +- include/llvm/Support/Threading.h | 4 +- include/llvm/Support/TimeValue.h | 24 +- include/llvm/Support/Timer.h | 13 +- include/llvm/Support/ToolOutputFile.h | 4 +- include/llvm/Support/Valgrind.h | 2 +- include/llvm/Support/ValueHandle.h | 47 +- include/llvm/Support/Watchdog.h | 38 + include/llvm/Support/Win64EH.h | 89 +- include/llvm/Support/YAMLParser.h | 11 +- include/llvm/Support/YAMLTraits.h | 1104 + include/llvm/Support/circular_raw_ostream.h | 4 +- include/llvm/Support/raw_ostream.h | 1 - include/llvm/Support/system_error.h | 4 +- include/llvm/Support/type_traits.h | 24 + include/llvm/TableGen/Error.h | 1 + include/llvm/TableGen/Record.h | 117 +- include/llvm/TableGen/StringMatcher.h | 8 +- include/llvm/Target/CostTable.h | 64 + include/llvm/Target/Mangler.h | 6 +- include/llvm/Target/Target.td | 10 +- include/llvm/Target/TargetFrameLowering.h | 34 +- include/llvm/Target/TargetInstrInfo.h | 161 +- include/llvm/Target/TargetJITInfo.h | 2 +- include/llvm/Target/TargetLibraryInfo.h | 279 +- include/llvm/Target/TargetLowering.h | 1492 +- .../llvm/Target/TargetLoweringObjectFile.h | 17 +- include/llvm/Target/TargetMachine.h | 31 +- include/llvm/Target/TargetOptions.h | 18 +- include/llvm/Target/TargetRegisterInfo.h | 104 +- include/llvm/Target/TargetSchedule.td | 6 + include/llvm/Target/TargetSubtargetInfo.h | 11 + include/llvm/Target/TargetTransformImpl.h | 98 - include/llvm/TargetTransformInfo.h | 204 - include/llvm/Transforms/IPO.h | 4 +- include/llvm/Transforms/IPO/InlinerPass.h | 3 +- .../llvm/Transforms/IPO/PassManagerBuilder.h | 4 +- include/llvm/Transforms/Instrumentation.h | 51 +- include/llvm/Transforms/ObjCARC.h | 49 + include/llvm/Transforms/Scalar.h | 32 +- .../llvm/Transforms/Utils/AddrModeMatcher.h | 109 - .../llvm/Transforms/Utils/BasicBlockUtils.h | 7 +- .../llvm/Transforms/Utils}/BlackList.h | 11 +- include/llvm/Transforms/Utils/BuildLibCalls.h | 8 +- .../Transforms/Utils/BypassSlowDivision.h | 7 +- include/llvm/Transforms/Utils/Cloning.h | 2 +- .../llvm/Transforms/Utils/CmpInstAnalysis.h | 2 +- .../llvm/Transforms/Utils/IntegerDivision.h | 18 +- include/llvm/Transforms/Utils/Local.h | 20 +- include/llvm/Transforms/Utils/ModuleUtils.h | 6 +- .../llvm/Transforms/Utils/PromoteMemToReg.h | 4 +- include/llvm/Transforms/Utils/SSAUpdater.h | 1 + .../llvm/Transforms/Utils/SimplifyLibCalls.h | 3 +- include/llvm/Transforms/Vectorize.h | 6 +- lib/Analysis/AliasAnalysis.cpp | 70 +- lib/Analysis/AliasAnalysisCounter.cpp | 2 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 86 +- lib/Analysis/AliasDebugger.cpp | 10 +- lib/Analysis/AliasSetTracker.cpp | 12 +- lib/Analysis/Analysis.cpp | 4 +- lib/Analysis/BasicAliasAnalysis.cpp | 95 +- lib/Analysis/BlockFrequencyInfo.cpp | 6 +- lib/Analysis/BranchProbabilityInfo.cpp | 12 +- lib/Analysis/CFGPrinter.cpp | 1 - lib/Analysis/CMakeLists.txt | 4 +- lib/Analysis/CaptureTracking.cpp | 5 + lib/Analysis/CodeMetrics.cpp | 144 +- lib/Analysis/ConstantFolding.cpp | 256 +- lib/Analysis/CostModel.cpp | 122 +- lib/Analysis/DbgInfoPrinter.cpp | 224 - lib/Analysis/DependenceAnalysis.cpp | 302 +- lib/Analysis/DominanceFrontier.cpp | 2 +- lib/Analysis/IPA/CMakeLists.txt | 2 + lib/Analysis/IPA/CallGraph.cpp | 6 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 13 +- lib/Analysis/IPA/CallPrinter.cpp | 87 + lib/Analysis/IPA/FindUsedTypes.cpp | 6 +- lib/Analysis/IPA/GlobalsModRef.cpp | 16 +- lib/Analysis/IPA/IPA.cpp | 2 + lib/Analysis/{ => IPA}/InlineCost.cpp | 424 +- lib/Analysis/IVUsers.cpp | 12 +- lib/Analysis/InstCount.cpp | 10 +- lib/Analysis/InstructionSimplify.cpp | 506 +- lib/Analysis/Interval.cpp | 2 +- lib/Analysis/LazyValueInfo.cpp | 19 +- lib/Analysis/LibCallAliasAnalysis.cpp | 4 +- lib/Analysis/LibCallSemantics.cpp | 2 +- lib/Analysis/Lint.cpp | 100 +- lib/Analysis/Loads.cpp | 57 +- lib/Analysis/LoopInfo.cpp | 74 +- lib/Analysis/MemDepPrinter.cpp | 14 +- lib/Analysis/MemoryBuiltins.cpp | 131 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 324 +- lib/Analysis/ModuleDebugInfoPrinter.cpp | 4 +- lib/Analysis/NoAliasAnalysis.cpp | 4 +- lib/Analysis/PHITransAddr.cpp | 6 +- lib/Analysis/PathNumbering.cpp | 15 +- lib/Analysis/PathProfileInfo.cpp | 7 +- lib/Analysis/PathProfileVerifier.cpp | 13 +- lib/Analysis/PostDominators.cpp | 8 +- lib/Analysis/ProfileDataLoader.cpp | 6 +- lib/Analysis/ProfileDataLoaderPass.cpp | 28 +- lib/Analysis/ProfileEstimatorPass.cpp | 6 +- lib/Analysis/ProfileInfo.cpp | 16 +- lib/Analysis/ProfileInfoLoader.cpp | 4 +- lib/Analysis/ProfileInfoLoaderPass.cpp | 16 +- lib/Analysis/ProfileVerifierPass.cpp | 15 +- lib/Analysis/PtrUseVisitor.cpp | 36 + lib/Analysis/RegionInfo.cpp | 7 +- lib/Analysis/RegionPrinter.cpp | 12 +- lib/Analysis/ScalarEvolution.cpp | 61 +- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 2 +- lib/Analysis/ScalarEvolutionExpander.cpp | 25 +- lib/Analysis/SparsePropagation.cpp | 6 +- lib/Analysis/TargetTransformInfo.cpp | 558 + lib/Analysis/Trace.cpp | 2 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 10 +- lib/Analysis/ValueTracking.cpp | 390 +- lib/Archive/Archive.cpp | 5 +- lib/Archive/ArchiveInternals.h | 3 +- lib/Archive/ArchiveReader.cpp | 27 +- lib/Archive/ArchiveWriter.cpp | 5 +- lib/AsmParser/LLLexer.cpp | 195 +- lib/AsmParser/LLLexer.h | 3 +- lib/AsmParser/LLParser.cpp | 686 +- lib/AsmParser/LLParser.h | 71 +- lib/AsmParser/LLToken.h | 56 +- lib/AsmParser/Parser.cpp | 4 +- lib/Bitcode/Reader/BitReader.cpp | 14 +- lib/Bitcode/Reader/BitcodeReader.cpp | 690 +- lib/Bitcode/Reader/BitcodeReader.h | 70 +- lib/Bitcode/Reader/BitstreamReader.cpp | 371 + lib/Bitcode/Reader/CMakeLists.txt | 1 + lib/Bitcode/Writer/BitWriter.cpp | 9 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 210 +- lib/Bitcode/Writer/BitcodeWriterPass.cpp | 4 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 64 +- lib/Bitcode/Writer/ValueEnumerator.h | 52 +- lib/CMakeLists.txt | 4 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 27 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 6 +- lib/CodeGen/AllocationOrder.cpp | 76 +- lib/CodeGen/AllocationOrder.h | 65 +- lib/CodeGen/Analysis.cpp | 53 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 90 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 214 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 56 +- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 16 +- lib/CodeGen/AsmPrinter/CMakeLists.txt | 1 + lib/CodeGen/AsmPrinter/DIE.cpp | 32 +- lib/CodeGen/AsmPrinter/DIE.h | 26 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 18 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 32 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 25 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 474 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 74 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1305 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 504 +- lib/CodeGen/AsmPrinter/DwarfException.cpp | 45 +- lib/CodeGen/AsmPrinter/DwarfException.h | 3 + lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 120 + lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 18 +- lib/CodeGen/AsmPrinter/Win64Exception.cpp | 23 +- lib/CodeGen/BasicTargetTransformInfo.cpp | 466 + lib/CodeGen/BranchFolding.cpp | 23 +- lib/CodeGen/CMakeLists.txt | 22 +- lib/CodeGen/CallingConvLower.cpp | 18 +- lib/CodeGen/CodeGen.cpp | 2 +- lib/CodeGen/CodePlacementOpt.cpp | 422 - lib/CodeGen/CriticalAntiDepBreaker.cpp | 58 +- lib/CodeGen/CriticalAntiDepBreaker.h | 5 +- lib/CodeGen/DFAPacketizer.cpp | 4 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 13 +- lib/CodeGen/DwarfEHPrepare.cpp | 14 +- lib/CodeGen/EarlyIfConversion.cpp | 10 +- lib/CodeGen/ErlangGC.cpp | 81 + lib/CodeGen/ExecutionDepsFix.cpp | 8 +- lib/CodeGen/ExpandISelPseudos.cpp | 4 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 45 +- lib/CodeGen/GCMetadata.cpp | 45 +- lib/CodeGen/GCStrategy.cpp | 14 +- lib/CodeGen/IfConversion.cpp | 28 +- lib/CodeGen/InlineSpiller.cpp | 8 +- lib/CodeGen/InterferenceCache.cpp | 4 +- lib/CodeGen/InterferenceCache.h | 2 +- lib/CodeGen/IntrinsicLowering.cpp | 12 +- lib/CodeGen/LLVMBuild.txt | 2 +- lib/CodeGen/LLVMTargetMachine.cpp | 37 +- lib/CodeGen/LexicalScopes.cpp | 18 +- lib/CodeGen/LiveDebugVariables.cpp | 77 +- lib/CodeGen/LiveInterval.cpp | 372 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 513 +- lib/CodeGen/LiveIntervalUnion.cpp | 31 +- lib/CodeGen/LiveRangeCalc.cpp | 94 +- lib/CodeGen/LiveRangeCalc.h | 29 +- lib/CodeGen/LiveRangeEdit.cpp | 8 +- lib/CodeGen/LiveRegMatrix.cpp | 10 +- lib/CodeGen/LiveStackAnalysis.cpp | 4 +- lib/CodeGen/LiveVariables.cpp | 33 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 20 +- lib/CodeGen/MachineBasicBlock.cpp | 214 +- lib/CodeGen/MachineBlockFrequencyInfo.cpp | 6 +- lib/CodeGen/MachineBlockPlacement.cpp | 18 +- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 2 +- lib/CodeGen/MachineCSE.cpp | 12 +- lib/CodeGen/MachineCopyPropagation.cpp | 37 +- lib/CodeGen/MachineFunction.cpp | 173 +- lib/CodeGen/MachineFunctionPass.cpp | 2 +- lib/CodeGen/MachineFunctionPrinterPass.cpp | 4 +- lib/CodeGen/MachineInstr.cpp | 464 +- lib/CodeGen/MachineInstrBundle.cpp | 28 +- lib/CodeGen/MachineLICM.cpp | 20 +- lib/CodeGen/MachineLoopInfo.cpp | 2 +- lib/CodeGen/MachineLoopRanges.cpp | 116 - lib/CodeGen/MachineModuleInfo.cpp | 58 +- lib/CodeGen/MachineRegisterInfo.cpp | 63 +- lib/CodeGen/MachineSSAUpdater.cpp | 22 +- lib/CodeGen/MachineScheduler.cpp | 690 +- lib/CodeGen/MachineSink.cpp | 14 +- lib/CodeGen/MachineTraceMetrics.cpp | 175 +- lib/CodeGen/MachineVerifier.cpp | 49 +- lib/CodeGen/OptimizePHIs.cpp | 6 +- lib/CodeGen/PHIElimination.cpp | 270 +- lib/CodeGen/PHIEliminationUtils.cpp | 2 +- lib/CodeGen/Passes.cpp | 77 +- lib/CodeGen/PeepholeOptimizer.cpp | 23 +- lib/CodeGen/PostRASchedulerList.cpp | 70 +- lib/CodeGen/PrologEpilogInserter.cpp | 207 +- lib/CodeGen/PrologEpilogInserter.h | 6 +- lib/CodeGen/PseudoSourceValue.cpp | 8 +- lib/CodeGen/RegAllocBase.cpp | 4 +- lib/CodeGen/RegAllocBase.h | 4 +- lib/CodeGen/RegAllocBasic.cpp | 16 +- lib/CodeGen/RegAllocFast.cpp | 147 +- lib/CodeGen/RegAllocGreedy.cpp | 39 +- lib/CodeGen/RegAllocPBQP.cpp | 12 +- lib/CodeGen/RegisterClassInfo.cpp | 34 +- lib/CodeGen/RegisterCoalescer.cpp | 329 +- lib/CodeGen/RegisterPressure.cpp | 547 +- lib/CodeGen/RegisterScavenging.cpp | 138 +- lib/CodeGen/ScheduleDAG.cpp | 115 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 723 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 16 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 660 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 80 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 35 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 168 +- lib/CodeGen/SelectionDAG/InstrEmitter.h | 16 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 299 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 568 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 81 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 73 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 29 +- .../SelectionDAG/LegalizeTypesGeneric.cpp | 2 +- .../SelectionDAG/LegalizeVectorOps.cpp | 190 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 78 +- .../SelectionDAG/ResourcePriorityQueue.cpp | 26 +- lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 2 +- lib/CodeGen/SelectionDAG/SDNodeOrdering.h | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 18 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 149 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 22 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 18 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 425 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 697 +- .../SelectionDAG/SelectionDAGBuilder.h | 15 +- .../SelectionDAG/SelectionDAGDumper.cpp | 19 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 233 +- .../SelectionDAG/SelectionDAGPrinter.cpp | 16 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1304 +- lib/CodeGen/ShadowStackGC.cpp | 8 +- lib/CodeGen/ShrinkWrapping.cpp | 19 +- lib/CodeGen/SjLjEHPrepare.cpp | 39 +- lib/CodeGen/SlotIndexes.cpp | 70 + lib/CodeGen/SpillPlacement.cpp | 1 + lib/CodeGen/Spiller.cpp | 6 +- lib/CodeGen/SplitKit.cpp | 2 +- lib/CodeGen/StackColoring.cpp | 175 +- lib/CodeGen/StackProtector.cpp | 147 +- lib/CodeGen/StackSlotColoring.cpp | 10 +- lib/CodeGen/StrongPHIElimination.cpp | 8 +- lib/CodeGen/TailDuplication.cpp | 29 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 3 +- ...tInstrInfoImpl.cpp => TargetInstrInfo.cpp} | 188 +- lib/CodeGen/TargetLoweringBase.cpp | 1305 + lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 152 +- .../TargetRegisterInfo.cpp | 39 +- lib/CodeGen/TargetSchedule.cpp | 9 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 611 +- lib/CodeGen/UnreachableBlockElim.cpp | 16 +- lib/CodeGen/VirtRegMap.cpp | 37 +- lib/DebugInfo/CMakeLists.txt | 1 + lib/DebugInfo/DIContext.cpp | 13 +- .../DWARFAbbreviationDeclaration.cpp | 18 +- lib/DebugInfo/DWARFAbbreviationDeclaration.h | 10 +- lib/DebugInfo/DWARFCompileUnit.cpp | 15 +- lib/DebugInfo/DWARFCompileUnit.h | 27 +- lib/DebugInfo/DWARFContext.cpp | 385 +- lib/DebugInfo/DWARFContext.h | 117 +- lib/DebugInfo/DWARFDebugArangeSet.cpp | 32 +- lib/DebugInfo/DWARFDebugArangeSet.h | 8 +- lib/DebugInfo/DWARFDebugAranges.cpp | 49 +- lib/DebugInfo/DWARFDebugAranges.h | 8 +- lib/DebugInfo/DWARFDebugFrame.cpp | 391 + lib/DebugInfo/DWARFDebugFrame.h | 46 + lib/DebugInfo/DWARFDebugInfoEntry.cpp | 30 +- lib/DebugInfo/DWARFDebugLine.cpp | 92 +- lib/DebugInfo/DWARFDebugLine.h | 8 + lib/DebugInfo/DWARFFormValue.cpp | 95 +- lib/DebugInfo/DWARFFormValue.h | 4 + lib/DebugInfo/DWARFRelocMap.h | 22 + lib/ExecutionEngine/EventListenerCommon.h | 6 +- lib/ExecutionEngine/ExecutionEngine.cpp | 197 +- .../ExecutionEngineBindings.cpp | 2 +- .../IntelJITEvents/IntelJITEventListener.cpp | 115 +- .../IntelJITEvents/IntelJITEventsWrapper.h | 6 - lib/ExecutionEngine/Interpreter/Execution.cpp | 53 +- .../Interpreter/ExternalFunctions.cpp | 12 +- .../Interpreter/Interpreter.cpp | 4 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 7 +- lib/ExecutionEngine/JIT/JIT.cpp | 23 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 10 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 4 + lib/ExecutionEngine/JIT/JITEmitter.cpp | 80 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 30 +- lib/ExecutionEngine/MCJIT/CMakeLists.txt | 1 + lib/ExecutionEngine/MCJIT/LLVMBuild.txt | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 17 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 2 +- .../MCJIT/SectionMemoryManager.cpp | 226 + .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- .../OProfileJIT/OProfileWrapper.cpp | 1 + .../RuntimeDyld/GDBRegistrar.cpp | 8 +- .../RuntimeDyld/ObjectImageCommon.h | 154 +- .../RuntimeDyld/RuntimeDyld.cpp | 49 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 128 +- .../RuntimeDyld/RuntimeDyldImpl.h | 4 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 7 +- .../RuntimeDyld/RuntimeDyldMachO.h | 2 +- lib/ExecutionEngine/TargetSelect.cpp | 9 +- lib/{VMCore => IR}/AsmWriter.cpp | 258 +- lib/IR/AttributeImpl.h | 278 + lib/IR/Attributes.cpp | 1180 + lib/{VMCore => IR}/AutoUpgrade.cpp | 14 +- lib/{VMCore => IR}/BasicBlock.cpp | 16 +- lib/{VMCore => IR}/CMakeLists.txt | 1 - lib/{VMCore => IR}/ConstantFold.cpp | 74 +- lib/{VMCore => IR}/ConstantFold.h | 0 lib/{VMCore => IR}/Constants.cpp | 200 +- lib/{VMCore => IR}/ConstantsContext.h | 6 +- lib/{VMCore => IR}/Core.cpp | 118 +- lib/{VMCore => IR}/DIBuilder.cpp | 386 +- lib/{VMCore => IR}/DataLayout.cpp | 274 +- lib/{VMCore => IR}/DebugInfo.cpp | 395 +- lib/{VMCore => IR}/DebugLoc.cpp | 4 +- lib/{VMCore => IR}/Dominators.cpp | 10 +- lib/{VMCore => IR}/Function.cpp | 211 +- lib/{VMCore => IR}/GCOV.cpp | 0 lib/{VMCore => IR}/GVMaterializer.cpp | 0 lib/{VMCore => IR}/Globals.cpp | 28 +- lib/{VMCore => IR}/IRBuilder.cpp | 10 +- lib/{VMCore => IR}/InlineAsm.cpp | 8 +- lib/{VMCore => IR}/Instruction.cpp | 128 +- lib/{VMCore => IR}/Instructions.cpp | 119 +- lib/{VMCore => IR}/IntrinsicInst.cpp | 8 +- lib/{VMCore => IR}/LLVMBuild.txt | 2 +- lib/{VMCore => IR}/LLVMContext.cpp | 22 +- lib/{VMCore => IR}/LLVMContextImpl.cpp | 19 +- lib/{VMCore => IR}/LLVMContextImpl.h | 34 +- lib/{VMCore => IR}/LeakDetector.cpp | 4 +- lib/{VMCore => IR}/LeaksContext.h | 2 +- lib/{VMCore => IR}/Makefile | 16 +- lib/{VMCore => IR}/Metadata.cpp | 17 +- lib/{VMCore => IR}/Module.cpp | 52 +- lib/{VMCore => IR}/Pass.cpp | 25 +- lib/{VMCore => IR}/PassManager.cpp | 107 +- lib/{VMCore => IR}/PassRegistry.cpp | 8 +- lib/{VMCore => IR}/PrintModulePass.cpp | 43 +- .../SymbolTableListTraitsImpl.h | 4 +- lib/{VMCore => IR}/Type.cpp | 35 +- lib/{VMCore => IR}/TypeFinder.cpp | 14 +- lib/{VMCore => IR}/Use.cpp | 4 +- lib/{VMCore => IR}/User.cpp | 8 +- lib/{VMCore => IR}/Value.cpp | 37 +- lib/{VMCore => IR}/ValueSymbolTable.cpp | 8 +- lib/{VMCore => IR}/ValueTypes.cpp | 47 +- lib/{VMCore => IR}/Verifier.cpp | 389 +- lib/IRReader/CMakeLists.txt | 3 + lib/IRReader/IRReader.cpp | 89 + .../MCTargetDesc => IRReader}/LLVMBuild.txt | 9 +- lib/IRReader/Makefile | 14 + lib/LLVMBuild.txt | 2 +- lib/Linker/CMakeLists.txt | 2 - lib/Linker/LLVMBuild.txt | 2 +- lib/Linker/LinkArchives.cpp | 197 - lib/Linker/LinkItems.cpp | 241 - lib/Linker/LinkModules.cpp | 301 +- lib/Linker/Linker.cpp | 113 +- lib/MC/CMakeLists.txt | 1 - lib/MC/ELFObjectWriter.cpp | 81 +- lib/MC/MCAsmInfo.cpp | 3 + lib/MC/MCAsmStreamer.cpp | 106 +- lib/MC/MCAssembler.cpp | 431 +- lib/MC/MCContext.cpp | 82 +- lib/MC/MCDisassembler/CMakeLists.txt | 5 - lib/MC/MCDisassembler/Disassembler.cpp | 87 +- lib/MC/MCDisassembler/Disassembler.h | 6 +- lib/MC/MCDisassembler/EDDisassembler.cpp | 400 - lib/MC/MCDisassembler/EDDisassembler.h | 271 - lib/MC/MCDisassembler/EDInfo.h | 84 - lib/MC/MCDisassembler/EDInst.cpp | 212 - lib/MC/MCDisassembler/EDInst.h | 182 - lib/MC/MCDisassembler/EDMain.cpp | 276 - lib/MC/MCDisassembler/EDOperand.cpp | 315 - lib/MC/MCDisassembler/EDOperand.h | 91 - lib/MC/MCDisassembler/EDToken.cpp | 214 - lib/MC/MCDisassembler/EDToken.h | 139 - lib/MC/MCDwarf.cpp | 296 +- lib/MC/MCELF.cpp | 17 +- lib/MC/MCELFObjectTargetWriter.cpp | 5 - lib/MC/MCELFStreamer.cpp | 341 +- lib/MC/MCExpr.cpp | 23 +- lib/MC/MCInstPrinter.cpp | 13 +- lib/MC/MCMachOStreamer.cpp | 52 +- lib/MC/MCNullStreamer.cpp | 21 +- lib/MC/MCObjectFileInfo.cpp | 79 +- lib/MC/MCObjectStreamer.cpp | 99 +- lib/MC/MCParser/AsmLexer.cpp | 84 +- lib/MC/MCParser/AsmParser.cpp | 2913 +- lib/MC/MCParser/COFFAsmParser.cpp | 83 +- lib/MC/MCParser/DarwinAsmParser.cpp | 249 +- lib/MC/MCParser/ELFAsmParser.cpp | 103 +- lib/MC/MCParser/MCAsmLexer.cpp | 2 +- lib/MC/MCParser/MCAsmParser.cpp | 8 +- lib/MC/MCPureStreamer.cpp | 36 +- lib/MC/MCSection.cpp | 2 +- lib/MC/MCSectionMachO.cpp | 4 +- lib/MC/MCStreamer.cpp | 159 +- lib/MC/MCSubtargetInfo.cpp | 4 +- lib/MC/MCWin64EH.cpp | 12 +- lib/MC/MachObjectWriter.cpp | 72 +- lib/MC/WinCOFFObjectWriter.cpp | 75 +- lib/MC/WinCOFFStreamer.cpp | 134 +- lib/Makefile | 5 +- lib/Object/Archive.cpp | 279 +- lib/Object/COFFObjectFile.cpp | 2 +- lib/Object/ELFObjectFile.cpp | 46 +- lib/Object/MachOObject.cpp | 18 +- lib/Object/MachOObjectFile.cpp | 106 +- lib/Object/ObjectFile.cpp | 4 +- lib/Option/Arg.cpp | 122 + lib/Option/ArgList.cpp | 385 + lib/Option/CMakeLists.txt | 8 + lib/Option/LLVMBuild.txt | 22 + lib/Option/Makefile | 14 + lib/Option/OptTable.cpp | 387 + lib/Option/Option.cpp | 202 + lib/Support/APFloat.cpp | 96 +- lib/Support/APInt.cpp | 52 +- lib/Support/Allocator.cpp | 10 +- lib/Support/CMakeLists.txt | 6 + lib/Support/CommandLine.cpp | 24 +- lib/Support/ConstantRange.cpp | 2 +- lib/Support/ConvertUTF.c | 571 + lib/Support/ConvertUTFWrapper.cpp | 76 + lib/Support/CrashRecoveryContext.cpp | 4 +- lib/Support/DataStream.cpp | 4 +- lib/Support/Debug.cpp | 6 +- lib/Support/Disassembler.cpp | 5 +- lib/Support/Dwarf.cpp | 19 +- lib/Support/DynamicLibrary.cpp | 8 +- lib/Support/ErrorHandling.cpp | 26 +- lib/Support/FileOutputBuffer.cpp | 83 +- lib/Support/FileUtilities.cpp | 14 +- lib/Support/FoldingSet.cpp | 10 +- lib/Support/GraphWriter.cpp | 15 +- lib/Support/Host.cpp | 107 +- lib/Support/LocaleWindows.inc | 2 +- lib/Support/LocaleXlocale.inc | 2 +- lib/Support/LockFileManager.cpp | 22 +- lib/Support/Memory.cpp | 2 +- lib/Support/MemoryBuffer.cpp | 107 +- lib/Support/Path.cpp | 3 +- lib/Support/PathV2.cpp | 29 +- lib/Support/PluginLoader.cpp | 4 +- lib/Support/PrettyStackTrace.cpp | 12 +- lib/Support/Process.cpp | 60 +- lib/Support/Program.cpp | 11 +- lib/Support/Regex.cpp | 8 +- lib/Support/SmallPtrSet.cpp | 24 +- lib/Support/SourceMgr.cpp | 197 +- lib/Support/Statistic.cpp | 22 +- lib/Support/StringRef.cpp | 3 +- lib/Support/Threading.cpp | 2 +- lib/Support/TimeValue.cpp | 9 +- lib/Support/Timer.cpp | 14 +- lib/Support/Triple.cpp | 24 +- lib/Support/Unix/Memory.inc | 36 +- lib/Support/Unix/PathV2.inc | 43 +- lib/Support/Unix/Process.inc | 101 +- lib/Support/Unix/Program.inc | 32 +- lib/Support/Unix/Signals.inc | 44 +- lib/Support/Unix/TimeValue.inc | 3 +- lib/Support/Unix/Unix.h | 10 +- lib/Support/Unix/Watchdog.inc | 32 + lib/Support/Watchdog.cpp | 23 + lib/Support/Windows/Memory.inc | 2 + lib/Support/Windows/Path.inc | 4 +- lib/Support/Windows/PathV2.inc | 65 +- lib/Support/Windows/Process.inc | 87 +- lib/Support/Windows/Program.inc | 26 +- lib/Support/Windows/Signals.inc | 6 +- lib/Support/Windows/Watchdog.inc | 24 + lib/Support/YAMLParser.cpp | 27 +- lib/Support/YAMLTraits.cpp | 827 + lib/Support/raw_ostream.cpp | 22 +- lib/Support/regcomp.c | 30 +- lib/Support/system_error.cpp | 2 +- lib/TableGen/Error.cpp | 7 +- lib/TableGen/Main.cpp | 15 +- lib/TableGen/Record.cpp | 66 +- lib/TableGen/TGLexer.cpp | 22 +- lib/TableGen/TGLexer.h | 17 +- lib/TableGen/TGParser.cpp | 255 +- lib/TableGen/TGParser.h | 10 +- lib/TableGen/TableGenBackend.cpp | 31 +- lib/Target/AArch64/AArch64.h | 42 + lib/Target/AArch64/AArch64.td | 70 + lib/Target/AArch64/AArch64AsmPrinter.cpp | 347 + lib/Target/AArch64/AArch64AsmPrinter.h | 80 + lib/Target/AArch64/AArch64BranchFixupPass.cpp | 600 + lib/Target/AArch64/AArch64CallingConv.td | 196 + lib/Target/AArch64/AArch64FrameLowering.cpp | 633 + lib/Target/AArch64/AArch64FrameLowering.h | 108 + lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 415 + lib/Target/AArch64/AArch64ISelLowering.cpp | 2975 + lib/Target/AArch64/AArch64ISelLowering.h | 247 + lib/Target/AArch64/AArch64InstrFormats.td | 961 + lib/Target/AArch64/AArch64InstrInfo.cpp | 822 + lib/Target/AArch64/AArch64InstrInfo.h | 112 + lib/Target/AArch64/AArch64InstrInfo.td | 5099 ++ lib/Target/AArch64/AArch64MCInstLower.cpp | 140 + .../AArch64MachineFunctionInfo.cpp} | 15 +- .../AArch64/AArch64MachineFunctionInfo.h | 149 + lib/Target/AArch64/AArch64RegisterInfo.cpp | 171 + lib/Target/AArch64/AArch64RegisterInfo.h | 76 + lib/Target/AArch64/AArch64RegisterInfo.td | 203 + .../AArch64Schedule.td} | 8 +- .../AArch64/AArch64SelectionDAGInfo.cpp | 25 + .../AArch64SelectionDAGInfo.h} | 17 +- lib/Target/AArch64/AArch64Subtarget.cpp | 43 + lib/Target/AArch64/AArch64Subtarget.h | 54 + lib/Target/AArch64/AArch64TargetMachine.cpp | 81 + lib/Target/AArch64/AArch64TargetMachine.h | 69 + .../AArch64/AArch64TargetObjectFile.cpp | 24 + lib/Target/AArch64/AArch64TargetObjectFile.h | 31 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 2197 + lib/Target/AArch64/AsmParser/CMakeLists.txt | 7 + lib/Target/AArch64/AsmParser/LLVMBuild.txt | 24 + lib/Target/AArch64/AsmParser/Makefile | 15 + lib/Target/AArch64/CMakeLists.txt | 36 + .../Disassembler/AArch64Disassembler.cpp | 803 + .../AArch64/Disassembler/CMakeLists.txt | 7 + lib/Target/AArch64/Disassembler/LLVMBuild.txt | 24 + lib/Target/AArch64/Disassembler/Makefile | 16 + .../InstPrinter/AArch64InstPrinter.cpp | 408 + .../AArch64/InstPrinter/AArch64InstPrinter.h | 172 + lib/Target/AArch64/InstPrinter/CMakeLists.txt | 8 + lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 24 + lib/Target/AArch64/InstPrinter/Makefile | 15 + lib/Target/AArch64/LLVMBuild.txt | 36 + .../MCTargetDesc/AArch64AsmBackend.cpp | 585 + .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 292 + .../MCTargetDesc/AArch64ELFStreamer.cpp | 160 + .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 27 + .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 113 + .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 41 + .../MCTargetDesc/AArch64MCAsmInfo.h} | 21 +- .../MCTargetDesc/AArch64MCCodeEmitter.cpp | 502 + .../AArch64/MCTargetDesc/AArch64MCExpr.cpp | 178 + .../AArch64/MCTargetDesc/AArch64MCExpr.h | 167 + .../MCTargetDesc/AArch64MCTargetDesc.cpp | 194 + .../MCTargetDesc/AArch64MCTargetDesc.h | 65 + .../AArch64/MCTargetDesc/CMakeLists.txt | 13 + lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 24 + .../MCTargetDesc/Makefile | 4 +- lib/Target/AArch64/Makefile | 30 + lib/Target/AArch64/README.txt | 2 + .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 24 + lib/Target/AArch64/TargetInfo/CMakeLists.txt | 7 + .../TargetInfo/LLVMBuild.txt | 9 +- .../{CellSPU => AArch64}/TargetInfo/Makefile | 4 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 1103 + lib/Target/AArch64/Utils/AArch64BaseInfo.h | 1068 + lib/Target/AArch64/Utils/CMakeLists.txt | 5 + lib/Target/AArch64/Utils/LLVMBuild.txt | 23 + lib/Target/AArch64/Utils/Makefile | 15 + lib/Target/ARM/A15SDOptimizer.cpp | 704 + lib/Target/ARM/ARM.h | 4 + lib/Target/ARM/ARM.td | 35 +- lib/Target/ARM/ARMAsmPrinter.cpp | 701 +- lib/Target/ARM/ARMAsmPrinter.h | 10 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 71 +- lib/Target/ARM/ARMBaseInstrInfo.h | 8 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 431 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 23 +- lib/Target/ARM/ARMCallingConv.h | 2 +- lib/Target/ARM/ARMCodeEmitter.cpp | 26 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 18 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 29 +- lib/Target/ARM/ARMConstantPoolValue.h | 6 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 +- lib/Target/ARM/ARMFastISel.cpp | 297 +- lib/Target/ARM/ARMFrameLowering.cpp | 126 +- lib/Target/ARM/ARMFrameLowering.h | 5 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 445 +- lib/Target/ARM/ARMISelLowering.cpp | 701 +- lib/Target/ARM/ARMISelLowering.h | 38 +- lib/Target/ARM/ARMInstrInfo.cpp | 4 +- lib/Target/ARM/ARMInstrInfo.td | 169 +- lib/Target/ARM/ARMInstrNEON.td | 24 +- lib/Target/ARM/ARMInstrThumb2.td | 26 +- lib/Target/ARM/ARMJITInfo.cpp | 4 +- lib/Target/ARM/ARMJITInfo.h | 4 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 200 +- lib/Target/ARM/ARMMCInstLower.cpp | 2 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 6 +- lib/Target/ARM/ARMSchedule.td | 71 + lib/Target/ARM/ARMScheduleA9.td | 56 +- lib/Target/ARM/ARMScheduleSwift.td | 61 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 2 +- lib/Target/ARM/ARMSubtarget.cpp | 126 +- lib/Target/ARM/ARMSubtarget.h | 34 +- lib/Target/ARM/ARMTargetMachine.cpp | 32 +- lib/Target/ARM/ARMTargetMachine.h | 38 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 15 +- lib/Target/ARM/ARMTargetObjectFile.h | 5 + lib/Target/ARM/ARMTargetTransformInfo.cpp | 458 + lib/Target/ARM/AsmParser/ARMAsmLexer.cpp | 138 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 657 +- lib/Target/ARM/AsmParser/CMakeLists.txt | 1 - lib/Target/ARM/CMakeLists.txt | 3 +- .../ARM/Disassembler/ARMDisassembler.cpp | 42 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 59 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + lib/Target/ARM/LICENSE.TXT | 47 + lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 153 +- .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 23 +- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 418 + lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h | 27 + .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 23 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 3 + .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 20 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 5 +- lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h | 112 + lib/Target/ARM/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/ARM/MLxExpansionPass.cpp | 10 +- lib/Target/ARM/Makefile | 2 +- lib/Target/ARM/README-Thumb.txt | 2 - lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp | 2 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 48 +- lib/Target/ARM/Thumb1FrameLowering.h | 4 + lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 91 +- lib/Target/ARM/Thumb1RegisterInfo.h | 8 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 6 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 6 +- lib/Target/ARM/Thumb2RegisterInfo.cpp | 6 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 377 +- lib/Target/CMakeLists.txt | 3 - lib/Target/CellSPU/CMakeLists.txt | 30 - lib/Target/CellSPU/CellSDKIntrinsics.td | 449 - .../CellSPU/MCTargetDesc/CMakeLists.txt | 6 - .../CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp | 43 - .../CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp | 94 - .../CellSPU/MCTargetDesc/SPUMCTargetDesc.h | 38 - lib/Target/CellSPU/Makefile | 20 - lib/Target/CellSPU/README.txt | 106 - lib/Target/CellSPU/SPU.h | 31 - lib/Target/CellSPU/SPU.td | 66 - lib/Target/CellSPU/SPU128InstrInfo.td | 41 - lib/Target/CellSPU/SPU64InstrInfo.td | 408 - lib/Target/CellSPU/SPUAsmPrinter.cpp | 333 - lib/Target/CellSPU/SPUCallingConv.td | 53 - lib/Target/CellSPU/SPUFrameLowering.cpp | 256 - lib/Target/CellSPU/SPUFrameLowering.h | 80 - lib/Target/CellSPU/SPUHazardRecognizers.cpp | 135 - lib/Target/CellSPU/SPUHazardRecognizers.h | 37 - lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 1192 - lib/Target/CellSPU/SPUISelLowering.cpp | 3266 - lib/Target/CellSPU/SPUISelLowering.h | 178 - lib/Target/CellSPU/SPUInstrBuilder.h | 43 - lib/Target/CellSPU/SPUInstrFormats.td | 320 - lib/Target/CellSPU/SPUInstrInfo.cpp | 449 - lib/Target/CellSPU/SPUInstrInfo.h | 84 - lib/Target/CellSPU/SPUInstrInfo.td | 4484 -- lib/Target/CellSPU/SPUMachineFunction.h | 50 - lib/Target/CellSPU/SPUMathInstr.td | 97 - lib/Target/CellSPU/SPUNodes.td | 159 - lib/Target/CellSPU/SPUNopFiller.cpp | 153 - lib/Target/CellSPU/SPUOperands.td | 664 - lib/Target/CellSPU/SPURegisterInfo.cpp | 357 - lib/Target/CellSPU/SPURegisterInfo.h | 106 - lib/Target/CellSPU/SPURegisterInfo.td | 183 - lib/Target/CellSPU/SPURegisterNames.h | 19 - lib/Target/CellSPU/SPUSchedule.td | 59 - lib/Target/CellSPU/SPUSubtarget.cpp | 65 - lib/Target/CellSPU/SPUSubtarget.h | 97 - lib/Target/CellSPU/SPUTargetMachine.cpp | 94 - lib/Target/CellSPU/SPUTargetMachine.h | 96 - lib/Target/CellSPU/TargetInfo/CMakeLists.txt | 7 - .../CellSPU/TargetInfo/CellSPUTargetInfo.cpp | 20 - lib/Target/CppBackend/CPPBackend.cpp | 109 +- lib/Target/CppBackend/CPPTargetMachine.h | 2 +- .../TargetInfo/CppBackendTargetInfo.cpp | 2 +- lib/Target/Hexagon/CMakeLists.txt | 5 +- lib/Target/Hexagon/Hexagon.h | 8 +- lib/Target/Hexagon/Hexagon.td | 101 + lib/Target/Hexagon/HexagonAsmPrinter.cpp | 36 +- lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 11 +- .../Hexagon/HexagonCallingConvLower.cpp | 6 +- lib/Target/Hexagon/HexagonCallingConvLower.h | 4 +- .../Hexagon/HexagonExpandPredSpillCode.cpp | 11 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 183 + lib/Target/Hexagon/HexagonFrameLowering.cpp | 32 +- lib/Target/Hexagon/HexagonFrameLowering.h | 5 + lib/Target/Hexagon/HexagonHardwareLoops.cpp | 1653 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 176 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 107 +- lib/Target/Hexagon/HexagonISelLowering.h | 17 +- lib/Target/Hexagon/HexagonImmediates.td | 508 - lib/Target/Hexagon/HexagonInstrFormats.td | 445 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 65 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 1284 +- lib/Target/Hexagon/HexagonInstrInfo.h | 27 +- lib/Target/Hexagon/HexagonInstrInfo.td | 2394 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 6162 +- lib/Target/Hexagon/HexagonMCInst.h | 41 - lib/Target/Hexagon/HexagonMCInstLower.cpp | 6 +- .../Hexagon/HexagonMachineScheduler.cpp | 19 +- lib/Target/Hexagon/HexagonMachineScheduler.h | 8 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 42 +- lib/Target/Hexagon/HexagonOperands.td | 858 + lib/Target/Hexagon/HexagonPeephole.cpp | 8 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 159 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 14 +- lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 9 +- lib/Target/Hexagon/HexagonSchedule.td | 24 +- lib/Target/Hexagon/HexagonScheduleV4.td | 14 +- .../Hexagon/HexagonSplitTFRCondSets.cpp | 13 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 17 +- lib/Target/Hexagon/HexagonSubtarget.h | 2 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 60 +- lib/Target/Hexagon/HexagonTargetMachine.h | 21 +- .../Hexagon/HexagonTargetObjectFile.cpp | 10 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 598 +- .../InstPrinter/HexagonInstPrinter.cpp | 42 +- .../Hexagon/InstPrinter/HexagonInstPrinter.h | 17 +- lib/Target/Hexagon/InstPrinter/LLVMBuild.txt | 2 +- .../Hexagon/MCTargetDesc/CMakeLists.txt | 3 +- .../Hexagon/MCTargetDesc/HexagonBaseInfo.h | 141 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 1 + .../Hexagon/MCTargetDesc/HexagonMCInst.cpp | 175 + .../Hexagon/MCTargetDesc/HexagonMCInst.h | 100 + .../MCTargetDesc/HexagonMCTargetDesc.cpp | 2 + .../Hexagon/TargetInfo/HexagonTargetInfo.cpp | 2 +- lib/Target/LLVMBuild.txt | 2 +- lib/Target/MBlaze/AsmParser/CMakeLists.txt | 1 - .../MBlaze/AsmParser/MBlazeAsmLexer.cpp | 115 - .../MBlaze/AsmParser/MBlazeAsmParser.cpp | 111 +- lib/Target/MBlaze/CMakeLists.txt | 1 - .../Disassembler/MBlazeDisassembler.cpp | 9 +- .../MBlaze/Disassembler/MBlazeDisassembler.h | 5 - .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp | 4 +- lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 30 +- lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp | 4 +- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 47 +- lib/Target/MBlaze/MBlazeFrameLowering.h | 4 + lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp | 14 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 48 +- lib/Target/MBlaze/MBlazeISelLowering.h | 2 +- lib/Target/MBlaze/MBlazeInstrInfo.cpp | 4 +- lib/Target/MBlaze/MBlazeInstrInfo.td | 4 +- lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp | 14 +- lib/Target/MBlaze/MBlazeMCInstLower.cpp | 8 +- lib/Target/MBlaze/MBlazeMachineFunction.h | 2 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 86 +- lib/Target/MBlaze/MBlazeRegisterInfo.h | 10 +- lib/Target/MBlaze/MBlazeSubtarget.h | 2 +- lib/Target/MBlaze/MBlazeTargetMachine.cpp | 5 +- lib/Target/MBlaze/MBlazeTargetMachine.h | 22 +- lib/Target/MBlaze/MBlazeTargetObjectFile.cpp | 8 +- .../MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp | 8 +- .../MCTargetDesc/MBlazeMCCodeEmitter.cpp | 6 +- .../MCTargetDesc/MBlazeMCTargetDesc.cpp | 2 +- lib/Target/MBlaze/Makefile | 3 +- .../MBlaze/TargetInfo/MBlazeTargetInfo.cpp | 2 +- .../MSP430/InstPrinter/MSP430InstPrinter.cpp | 4 +- .../MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp | 2 +- .../MCTargetDesc/MSP430MCTargetDesc.cpp | 2 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 14 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 6 +- lib/Target/MSP430/MSP430CallingConv.td | 3 + lib/Target/MSP430/MSP430FrameLowering.cpp | 76 +- lib/Target/MSP430/MSP430FrameLowering.h | 7 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 12 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 129 +- lib/Target/MSP430/MSP430ISelLowering.h | 4 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 2 +- lib/Target/MSP430/MSP430InstrInfo.td | 4 +- lib/Target/MSP430/MSP430MCInstLower.cpp | 6 +- lib/Target/MSP430/MSP430MachineFunctionInfo.h | 6 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 85 +- lib/Target/MSP430/MSP430RegisterInfo.h | 7 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 4 +- lib/Target/MSP430/MSP430TargetMachine.h | 17 +- .../MSP430/TargetInfo/MSP430TargetInfo.cpp | 2 +- lib/Target/Mangler.cpp | 10 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 848 +- lib/Target/Mips/CMakeLists.txt | 6 +- lib/Target/Mips/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Mips/Disassembler/Makefile | 2 +- .../Mips/Disassembler/MipsDisassembler.cpp | 50 +- .../Mips/InstPrinter/MipsInstPrinter.cpp | 10 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 3 + lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 2 + .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 3 +- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 93 - .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 22 +- .../Mips/MCTargetDesc/MipsELFStreamer.cpp | 89 + .../Mips/MCTargetDesc/MipsELFStreamer.h | 43 + .../Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 7 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 29 +- .../Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp | 80 + lib/Target/Mips/MCTargetDesc/MipsReginfo.h | 31 + lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/Mips.h | 1 + lib/Target/Mips/Mips.td | 16 +- lib/Target/Mips/Mips16FrameLowering.cpp | 71 +- lib/Target/Mips/Mips16FrameLowering.h | 6 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 308 + lib/Target/Mips/Mips16ISelDAGToDAG.h | 51 + lib/Target/Mips/Mips16ISelLowering.cpp | 689 + lib/Target/Mips/Mips16ISelLowering.h | 80 + lib/Target/Mips/Mips16InstrFormats.td | 111 +- lib/Target/Mips/Mips16InstrInfo.cpp | 268 +- lib/Target/Mips/Mips16InstrInfo.h | 66 +- lib/Target/Mips/Mips16InstrInfo.td | 443 +- lib/Target/Mips/Mips16RegisterInfo.cpp | 86 +- lib/Target/Mips/Mips16RegisterInfo.h | 20 +- lib/Target/Mips/Mips64InstrInfo.td | 372 +- lib/Target/Mips/MipsAsmPrinter.cpp | 77 +- lib/Target/Mips/MipsAsmPrinter.h | 3 +- lib/Target/Mips/MipsCallingConv.td | 18 +- lib/Target/Mips/MipsCodeEmitter.cpp | 180 +- lib/Target/Mips/MipsCondMov.td | 181 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 85 + lib/Target/Mips/MipsDSPInstrFormats.td | 5 +- lib/Target/Mips/MipsDSPInstrInfo.td | 280 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 776 +- lib/Target/Mips/MipsFrameLowering.cpp | 8 +- lib/Target/Mips/MipsFrameLowering.h | 7 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 692 +- lib/Target/Mips/MipsISelDAGToDAG.h | 93 + lib/Target/Mips/MipsISelLowering.cpp | 1714 +- lib/Target/Mips/MipsISelLowering.h | 207 +- lib/Target/Mips/MipsInstrFPU.td | 490 +- lib/Target/Mips/MipsInstrFormats.td | 554 +- lib/Target/Mips/MipsInstrInfo.cpp | 170 +- lib/Target/Mips/MipsInstrInfo.h | 45 + lib/Target/Mips/MipsInstrInfo.td | 1141 +- lib/Target/Mips/MipsJITInfo.cpp | 4 +- lib/Target/Mips/MipsLongBranch.cpp | 54 +- lib/Target/Mips/MipsMCInstLower.cpp | 2 +- lib/Target/Mips/MipsMachineFunction.cpp | 20 +- lib/Target/Mips/MipsMachineFunction.h | 18 +- lib/Target/Mips/MipsRegisterInfo.cpp | 61 +- lib/Target/Mips/MipsRegisterInfo.h | 11 +- lib/Target/Mips/MipsRegisterInfo.td | 140 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 260 +- lib/Target/Mips/MipsSEFrameLowering.h | 7 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 473 + lib/Target/Mips/MipsSEISelDAGToDAG.h | 57 + lib/Target/Mips/MipsSEISelLowering.cpp | 442 + lib/Target/Mips/MipsSEISelLowering.h | 62 + lib/Target/Mips/MipsSEInstrInfo.cpp | 87 +- lib/Target/Mips/MipsSEInstrInfo.h | 24 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 59 +- lib/Target/Mips/MipsSERegisterInfo.h | 4 +- lib/Target/Mips/MipsSubtarget.cpp | 9 +- lib/Target/Mips/MipsSubtarget.h | 38 +- lib/Target/Mips/MipsTargetMachine.cpp | 17 +- lib/Target/Mips/MipsTargetMachine.h | 31 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 22 +- lib/Target/Mips/MipsTargetObjectFile.h | 2 + lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp | 2 +- lib/Target/NVPTX/CMakeLists.txt | 2 +- lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 38 +- .../NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 12 +- .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 11 +- lib/Target/NVPTX/ManagedStringPool.h | 1 - lib/Target/NVPTX/NVPTX.h | 34 +- lib/Target/NVPTX/NVPTX.td | 12 - lib/Target/NVPTX/NVPTXAllocaHoisting.cpp | 22 +- lib/Target/NVPTX/NVPTXAllocaHoisting.h | 2 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 1000 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 127 +- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 54 +- lib/Target/NVPTX/NVPTXFrameLowering.h | 12 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1642 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 17 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 1260 +- lib/Target/NVPTX/NVPTXISelLowering.h | 47 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 143 +- lib/Target/NVPTX/NVPTXInstrInfo.h | 31 +- lib/Target/NVPTX/NVPTXInstrInfo.td | 96 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 145 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 71 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 4 +- lib/Target/NVPTX/NVPTXNumRegisters.h | 6 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 264 +- lib/Target/NVPTX/NVPTXRegisterInfo.h | 31 +- lib/Target/NVPTX/NVPTXRegisterInfo.td | 44 - lib/Target/NVPTX/NVPTXSection.h | 4 +- lib/Target/NVPTX/NVPTXSplitBBatBar.cpp | 20 +- lib/Target/NVPTX/NVPTXSplitBBatBar.h | 2 +- lib/Target/NVPTX/NVPTXSubtarget.cpp | 22 +- lib/Target/NVPTX/NVPTXSubtarget.h | 14 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 85 +- lib/Target/NVPTX/NVPTXTargetMachine.h | 59 +- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 77 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 118 +- lib/Target/NVPTX/NVPTXUtilities.h | 18 +- lib/Target/NVPTX/NVPTXutil.cpp | 32 +- lib/Target/NVPTX/NVVMReflect.cpp | 177 + .../NVPTX/TargetInfo/NVPTXTargetInfo.cpp | 6 +- lib/Target/NVPTX/VectorElementize.cpp | 1248 - lib/Target/NVPTX/cl_common_defines.h | 113 +- lib/Target/NVPTX/gen-register-defs.py | 202 - lib/Target/PowerPC/CMakeLists.txt | 1 + .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 30 +- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 56 +- lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h | 70 - .../MCTargetDesc/PPCELFObjectWriter.cpp | 124 +- .../PowerPC/MCTargetDesc/PPCFixupKinds.h | 21 +- .../PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 10 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 75 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 9 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 7 + .../PowerPC/MCTargetDesc/PPCPredicates.cpp | 2 +- .../PowerPC/MCTargetDesc/PPCPredicates.h | 8 +- lib/Target/PowerPC/PPC.h | 26 +- lib/Target/PowerPC/PPC.td | 149 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 525 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 17 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 103 +- lib/Target/PowerPC/PPCCallingConv.td | 68 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 20 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 250 +- lib/Target/PowerPC/PPCFrameLowering.h | 21 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 2 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 333 +- lib/Target/PowerPC/PPCISelLowering.cpp | 1260 +- lib/Target/PowerPC/PPCISelLowering.h | 191 +- lib/Target/PowerPC/PPCInstr64Bit.td | 678 +- lib/Target/PowerPC/PPCInstrAltivec.td | 623 +- lib/Target/PowerPC/PPCInstrFormats.td | 22 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 268 +- lib/Target/PowerPC/PPCInstrInfo.h | 6 +- lib/Target/PowerPC/PPCInstrInfo.td | 932 +- lib/Target/PowerPC/PPCJITInfo.cpp | 19 +- lib/Target/PowerPC/PPCJITInfo.h | 2 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 9 +- lib/Target/PowerPC/PPCMachineFunctionInfo.h | 31 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 414 +- lib/Target/PowerPC/PPCRegisterInfo.h | 42 +- lib/Target/PowerPC/PPCRegisterInfo.td | 176 +- lib/Target/PowerPC/PPCScheduleA2.td | 15 + lib/Target/PowerPC/PPCScheduleG5.td | 15 + lib/Target/PowerPC/PPCSubtarget.cpp | 23 +- lib/Target/PowerPC/PPCSubtarget.h | 30 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 18 +- lib/Target/PowerPC/PPCTargetMachine.h | 20 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 240 + lib/Target/PowerPC/README.txt | 21 - .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp | 2 +- lib/Target/R600/AMDGPU.h | 51 + lib/Target/R600/AMDGPU.td | 41 + lib/Target/R600/AMDGPUAsmPrinter.cpp | 145 + lib/Target/R600/AMDGPUAsmPrinter.h | 44 + lib/Target/R600/AMDGPUCallingConv.td | 42 + lib/Target/R600/AMDGPUConvertToISA.cpp | 62 + lib/Target/R600/AMDGPUFrameLowering.cpp | 122 + lib/Target/R600/AMDGPUFrameLowering.h | 44 + lib/Target/R600/AMDGPUISelLowering.cpp | 414 + lib/Target/R600/AMDGPUISelLowering.h | 140 + lib/Target/R600/AMDGPUIndirectAddressing.cpp | 343 + lib/Target/R600/AMDGPUInstrInfo.cpp | 267 + lib/Target/R600/AMDGPUInstrInfo.h | 206 + lib/Target/R600/AMDGPUInstrInfo.td | 82 + lib/Target/R600/AMDGPUInstructions.td | 266 + lib/Target/R600/AMDGPUIntrinsics.td | 60 + lib/Target/R600/AMDGPUMCInstLower.cpp | 83 + lib/Target/R600/AMDGPUMCInstLower.h | 34 + lib/Target/R600/AMDGPUMachineFunction.cpp | 22 + lib/Target/R600/AMDGPUMachineFunction.h | 29 + lib/Target/R600/AMDGPURegisterInfo.cpp | 75 + lib/Target/R600/AMDGPURegisterInfo.h | 66 + lib/Target/R600/AMDGPURegisterInfo.td | 25 + lib/Target/R600/AMDGPUStructurizeCFG.cpp | 896 + lib/Target/R600/AMDGPUSubtarget.cpp | 87 + lib/Target/R600/AMDGPUSubtarget.h | 65 + lib/Target/R600/AMDGPUTargetMachine.cpp | 164 + lib/Target/R600/AMDGPUTargetMachine.h | 70 + lib/Target/R600/AMDIL.h | 121 + lib/Target/R600/AMDIL7XXDevice.cpp | 115 + lib/Target/R600/AMDIL7XXDevice.h | 72 + lib/Target/R600/AMDILBase.td | 85 + lib/Target/R600/AMDILCFGStructurizer.cpp | 3051 + lib/Target/R600/AMDILDevice.cpp | 132 + lib/Target/R600/AMDILDevice.h | 117 + lib/Target/R600/AMDILDeviceInfo.cpp | 94 + lib/Target/R600/AMDILDeviceInfo.h | 88 + lib/Target/R600/AMDILDevices.h | 19 + lib/Target/R600/AMDILEvergreenDevice.cpp | 169 + lib/Target/R600/AMDILEvergreenDevice.h | 93 + lib/Target/R600/AMDILISelDAGToDAG.cpp | 643 + lib/Target/R600/AMDILISelLowering.cpp | 647 + lib/Target/R600/AMDILInstrInfo.td | 207 + lib/Target/R600/AMDILIntrinsicInfo.cpp | 79 + lib/Target/R600/AMDILIntrinsicInfo.h | 49 + lib/Target/R600/AMDILIntrinsics.td | 232 + lib/Target/R600/AMDILNIDevice.cpp | 65 + lib/Target/R600/AMDILNIDevice.h | 57 + lib/Target/R600/AMDILPeepholeOptimizer.cpp | 1215 + lib/Target/R600/AMDILRegisterInfo.td | 107 + lib/Target/R600/AMDILSIDevice.cpp | 48 + lib/Target/R600/AMDILSIDevice.h | 39 + lib/Target/R600/CMakeLists.txt | 59 + .../R600/InstPrinter/AMDGPUInstPrinter.cpp | 172 + .../R600/InstPrinter/AMDGPUInstPrinter.h | 54 + lib/Target/R600/InstPrinter/CMakeLists.txt | 7 + lib/Target/R600/InstPrinter/LLVMBuild.txt | 24 + lib/Target/R600/InstPrinter/Makefile | 15 + lib/Target/{CellSPU => R600}/LLVMBuild.txt | 14 +- .../R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 90 + .../R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 83 + .../R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 30 + .../R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 40 + .../R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 113 + .../R600/MCTargetDesc/AMDGPUMCTargetDesc.h | 55 + lib/Target/R600/MCTargetDesc/CMakeLists.txt | 10 + lib/Target/R600/MCTargetDesc/LLVMBuild.txt | 23 + lib/Target/R600/MCTargetDesc/Makefile | 16 + .../R600/MCTargetDesc/R600MCCodeEmitter.cpp | 585 + .../R600/MCTargetDesc/SIMCCodeEmitter.cpp | 201 + lib/Target/R600/Makefile | 23 + lib/Target/R600/Processors.td | 30 + lib/Target/R600/R600ControlFlowFinalizer.cpp | 268 + lib/Target/R600/R600Defines.h | 97 + lib/Target/R600/R600EmitClauseMarkers.cpp | 255 + lib/Target/R600/R600ExpandSpecialInstrs.cpp | 297 + lib/Target/R600/R600ISelLowering.cpp | 1106 + lib/Target/R600/R600ISelLowering.h | 74 + lib/Target/R600/R600InstrInfo.cpp | 841 + lib/Target/R600/R600InstrInfo.h | 204 + lib/Target/R600/R600Instructions.td | 2267 + lib/Target/R600/R600Intrinsics.td | 31 + .../R600/R600MachineFunctionInfo.cpp} | 14 +- lib/Target/R600/R600MachineFunctionInfo.h | 32 + lib/Target/R600/R600MachineScheduler.cpp | 427 + lib/Target/R600/R600MachineScheduler.h | 120 + lib/Target/R600/R600RegisterInfo.cpp | 99 + lib/Target/R600/R600RegisterInfo.h | 55 + lib/Target/R600/R600RegisterInfo.td | 209 + lib/Target/R600/R600Schedule.td | 36 + lib/Target/R600/SIAnnotateControlFlow.cpp | 329 + lib/Target/R600/SIISelLowering.cpp | 670 + lib/Target/R600/SIISelLowering.h | 58 + lib/Target/R600/SIInsertWaits.cpp | 358 + lib/Target/R600/SIInstrFormats.td | 426 + lib/Target/R600/SIInstrInfo.cpp | 264 + lib/Target/R600/SIInstrInfo.h | 97 + lib/Target/R600/SIInstrInfo.td | 356 + lib/Target/R600/SIInstructions.td | 1607 + lib/Target/R600/SIIntrinsics.td | 42 + lib/Target/R600/SILowerControlFlow.cpp | 501 + lib/Target/R600/SIMachineFunctionInfo.cpp | 18 + lib/Target/R600/SIMachineFunctionInfo.h | 33 + lib/Target/R600/SIRegisterInfo.cpp | 53 + lib/Target/R600/SIRegisterInfo.h | 50 + lib/Target/R600/SIRegisterInfo.td | 182 + lib/Target/R600/SISchedule.td | 15 + .../R600/TargetInfo/AMDGPUTargetInfo.cpp | 26 + lib/Target/R600/TargetInfo/CMakeLists.txt | 7 + lib/Target/R600/TargetInfo/LLVMBuild.txt | 23 + lib/Target/R600/TargetInfo/Makefile | 15 + lib/Target/README.txt | 15 - lib/Target/Sparc/DelaySlotFiller.cpp | 6 +- lib/Target/Sparc/FPMover.cpp | 6 +- .../Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 5 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 4 +- lib/Target/Sparc/SparcCallingConv.td | 20 + lib/Target/Sparc/SparcFrameLowering.cpp | 22 +- lib/Target/Sparc/SparcFrameLowering.h | 4 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 2 +- lib/Target/Sparc/SparcISelLowering.cpp | 164 +- lib/Target/Sparc/SparcISelLowering.h | 19 +- lib/Target/Sparc/SparcInstr64Bit.td | 285 + lib/Target/Sparc/SparcInstrFormats.td | 37 + lib/Target/Sparc/SparcInstrInfo.cpp | 4 +- lib/Target/Sparc/SparcInstrInfo.td | 199 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 48 +- lib/Target/Sparc/SparcRegisterInfo.h | 11 +- lib/Target/Sparc/SparcRegisterInfo.td | 14 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/Sparc/SparcTargetMachine.h | 17 +- .../Sparc/TargetInfo/SparcTargetInfo.cpp | 2 +- lib/Target/Target.cpp | 5 +- lib/Target/TargetInstrInfo.cpp | 88 - lib/Target/TargetIntrinsicInfo.cpp | 2 +- lib/Target/TargetLibraryInfo.cpp | 283 +- lib/Target/TargetLoweringObjectFile.cpp | 40 +- lib/Target/TargetMachine.cpp | 34 +- lib/Target/TargetMachineC.cpp | 16 +- lib/Target/TargetSubtargetInfo.cpp | 4 + lib/Target/TargetTransformImpl.cpp | 353 - lib/Target/X86/AsmParser/CMakeLists.txt | 1 - lib/Target/X86/AsmParser/X86AsmLexer.cpp | 159 - lib/Target/X86/AsmParser/X86AsmParser.cpp | 1131 +- lib/Target/X86/CMakeLists.txt | 3 +- .../X86/Disassembler/X86Disassembler.cpp | 9 +- lib/Target/X86/Disassembler/X86Disassembler.h | 4 - .../X86/Disassembler/X86DisassemblerDecoder.c | 347 +- .../X86/InstPrinter/X86ATTInstPrinter.cpp | 12 +- .../X86/InstPrinter/X86InstComments.cpp | 135 +- .../X86/InstPrinter/X86IntelInstPrinter.cpp | 4 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 34 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 27 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 14 +- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 45 +- .../X86/MCTargetDesc/X86MCTargetDesc.cpp | 11 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 8 +- lib/Target/X86/Makefile | 3 +- lib/Target/X86/README-SSE.txt | 9 + lib/Target/X86/README.txt | 37 - lib/Target/X86/TargetInfo/X86TargetInfo.cpp | 2 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 18 + lib/Target/X86/Utils/X86ShuffleDecode.h | 2 + lib/Target/X86/X86.h | 11 +- lib/Target/X86/X86.td | 101 +- lib/Target/X86/X86AsmPrinter.cpp | 73 +- lib/Target/X86/X86AsmPrinter.h | 8 +- lib/Target/X86/X86COFFMachineModuleInfo.h | 2 +- lib/Target/X86/X86CallingConv.td | 102 +- lib/Target/X86/X86CodeEmitter.cpp | 36 +- .../X86/X86CompilationCallback_Win64.asm | 4 +- lib/Target/X86/X86FastISel.cpp | 169 +- lib/Target/X86/X86FloatingPoint.cpp | 12 +- lib/Target/X86/X86FrameLowering.cpp | 342 +- lib/Target/X86/X86FrameLowering.h | 6 + lib/Target/X86/X86ISelDAGToDAG.cpp | 38 +- lib/Target/X86/X86ISelLowering.cpp | 3426 +- lib/Target/X86/X86ISelLowering.h | 116 +- lib/Target/X86/X86Instr3DNow.td | 15 +- lib/Target/X86/X86InstrArithmetic.td | 567 +- lib/Target/X86/X86InstrCMovSetCC.td | 21 +- lib/Target/X86/X86InstrCompiler.td | 325 +- lib/Target/X86/X86InstrControl.td | 72 +- lib/Target/X86/X86InstrExtension.td | 73 +- lib/Target/X86/X86InstrFMA.td | 74 +- lib/Target/X86/X86InstrFPStack.td | 26 +- lib/Target/X86/X86InstrFormats.td | 163 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 12 +- lib/Target/X86/X86InstrInfo.cpp | 746 +- lib/Target/X86/X86InstrInfo.td | 334 +- lib/Target/X86/X86InstrMMX.td | 80 +- lib/Target/X86/X86InstrSSE.td | 2570 +- lib/Target/X86/X86InstrShiftRotate.td | 58 +- lib/Target/X86/X86InstrSystem.td | 32 +- lib/Target/X86/X86InstrTSX.td | 9 +- lib/Target/X86/X86JITInfo.cpp | 36 +- lib/Target/X86/X86JITInfo.h | 2 +- lib/Target/X86/X86MCInstLower.cpp | 120 +- lib/Target/X86/X86PadShortFunction.cpp | 212 + lib/Target/X86/X86RegisterInfo.cpp | 296 +- lib/Target/X86/X86RegisterInfo.h | 7 +- lib/Target/X86/X86SchedHaswell.td | 126 + lib/Target/X86/X86SchedSandyBridge.td | 122 + lib/Target/X86/X86Schedule.td | 94 +- lib/Target/X86/X86ScheduleAtom.td | 2 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 10 +- lib/Target/X86/X86Subtarget.cpp | 157 +- lib/Target/X86/X86Subtarget.h | 61 +- lib/Target/X86/X86TargetMachine.cpp | 58 +- lib/Target/X86/X86TargetMachine.h | 32 +- lib/Target/X86/X86TargetObjectFile.cpp | 16 +- lib/Target/X86/X86TargetObjectFile.h | 8 +- lib/Target/X86/X86TargetTransformInfo.cpp | 495 + lib/Target/X86/X86VZeroUpper.cpp | 10 + lib/Target/XCore/CMakeLists.txt | 4 + lib/Target/XCore/Disassembler/CMakeLists.txt | 5 + lib/Target/XCore/Disassembler/LLVMBuild.txt | 23 + lib/Target/XCore/Disassembler/Makefile | 16 + .../XCore/Disassembler/XCoreDisassembler.cpp | 800 + lib/Target/XCore/InstPrinter/CMakeLists.txt | 7 + lib/Target/XCore/InstPrinter/LLVMBuild.txt | 23 + lib/Target/XCore/InstPrinter/Makefile | 16 + .../XCore/InstPrinter/XCoreInstPrinter.cpp | 97 + .../XCore/InstPrinter/XCoreInstPrinter.h | 44 + lib/Target/XCore/LLVMBuild.txt | 3 +- lib/Target/XCore/MCTargetDesc/LLVMBuild.txt | 2 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 14 + lib/Target/XCore/Makefile | 6 +- .../XCore/TargetInfo/XCoreTargetInfo.cpp | 2 +- lib/Target/XCore/XCore.td | 6 + lib/Target/XCore/XCoreAsmPrinter.cpp | 90 +- lib/Target/XCore/XCoreFrameLowering.cpp | 71 +- lib/Target/XCore/XCoreFrameLowering.h | 4 + lib/Target/XCore/XCoreISelDAGToDAG.cpp | 27 +- lib/Target/XCore/XCoreISelLowering.cpp | 162 +- lib/Target/XCore/XCoreISelLowering.h | 6 +- lib/Target/XCore/XCoreInstrFormats.td | 269 +- lib/Target/XCore/XCoreInstrInfo.cpp | 8 +- lib/Target/XCore/XCoreInstrInfo.td | 1090 +- lib/Target/XCore/XCoreMCInstLower.cpp | 117 + lib/Target/XCore/XCoreMCInstLower.h | 42 + lib/Target/XCore/XCoreMachineFunctionInfo.h | 2 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 96 +- lib/Target/XCore/XCoreRegisterInfo.h | 7 +- lib/Target/XCore/XCoreRegisterInfo.td | 11 +- lib/Target/XCore/XCoreSubtarget.h | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 6 +- lib/Target/XCore/XCoreTargetMachine.h | 15 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 2 +- lib/Transforms/CMakeLists.txt | 1 + lib/Transforms/Hello/Hello.cpp | 6 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 96 +- lib/Transforms/IPO/ConstantMerge.cpp | 10 +- .../IPO/DeadArgumentElimination.cpp | 136 +- lib/Transforms/IPO/ExtractGV.cpp | 16 +- lib/Transforms/IPO/FunctionAttrs.cpp | 784 +- lib/Transforms/IPO/GlobalDCE.cpp | 6 +- lib/Transforms/IPO/GlobalOpt.cpp | 246 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 14 +- lib/Transforms/IPO/InlineAlways.cpp | 128 +- lib/Transforms/IPO/InlineSimple.cpp | 73 +- lib/Transforms/IPO/Inliner.cpp | 82 +- lib/Transforms/IPO/Internalize.cpp | 24 +- lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/IPO/LoopExtractor.cpp | 8 +- lib/Transforms/IPO/MergeFunctions.cpp | 30 +- lib/Transforms/IPO/PartialInlining.cpp | 10 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 28 +- lib/Transforms/IPO/PruneEH.cpp | 28 +- lib/Transforms/IPO/StripDeadPrototypes.cpp | 4 +- lib/Transforms/IPO/StripSymbols.cpp | 18 +- lib/Transforms/InstCombine/InstCombine.h | 71 +- .../InstCombine/InstCombineAddSub.cpp | 948 +- .../InstCombine/InstCombineAndOrXor.cpp | 410 +- .../InstCombine/InstCombineCalls.cpp | 160 +- .../InstCombine/InstCombineCasts.cpp | 388 +- .../InstCombine/InstCombineCompares.cpp | 137 +- .../InstCombineLoadStoreAlloca.cpp | 39 +- .../InstCombine/InstCombineMulDivRem.cpp | 421 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 6 +- .../InstCombine/InstCombineSelect.cpp | 9 +- .../InstCombine/InstCombineShifts.cpp | 173 +- .../InstCombineSimplifyDemanded.cpp | 354 +- .../InstCombine/InstCombineVectorOps.cpp | 23 +- .../InstCombine/InstCombineWorklist.h | 38 +- .../InstCombine/InstructionCombining.cpp | 134 +- .../Instrumentation/AddressSanitizer.cpp | 1106 +- lib/Transforms/Instrumentation/BlackList.cpp | 58 +- .../Instrumentation/BoundsChecking.cpp | 19 +- lib/Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/EdgeProfiling.cpp | 6 +- .../Instrumentation/GCOVProfiling.cpp | 301 +- .../Instrumentation/Instrumentation.cpp | 2 + .../Instrumentation/MaximumSpanningTree.h | 4 +- .../Instrumentation/MemorySanitizer.cpp | 1985 + .../Instrumentation/OptimalEdgeProfiling.cpp | 16 +- .../Instrumentation/PathProfiling.cpp | 19 +- .../Instrumentation/ProfilingUtils.cpp | 10 +- .../Instrumentation/ThreadSanitizer.cpp | 160 +- lib/Transforms/LLVMBuild.txt | 2 +- lib/Transforms/Makefile | 2 +- lib/Transforms/ObjCARC/CMakeLists.txt | 13 + lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 262 + lib/Transforms/ObjCARC/DependencyAnalysis.h | 79 + lib/Transforms/ObjCARC/LLVMBuild.txt | 23 + lib/Transforms/ObjCARC/Makefile | 15 + lib/Transforms/ObjCARC/ObjCARC.cpp | 48 + lib/Transforms/ObjCARC/ObjCARC.h | 395 + lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 175 + .../ObjCARC/ObjCARCAliasAnalysis.cpp | 162 + lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 74 + lib/Transforms/ObjCARC/ObjCARCContract.cpp | 541 + lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 128 + lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 3026 + lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 252 + lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 177 + lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 80 + lib/Transforms/Scalar/ADCE.cpp | 12 +- lib/Transforms/Scalar/BasicBlockPlacement.cpp | 6 +- lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/CodeGenPrepare.cpp | 698 +- lib/Transforms/Scalar/ConstantProp.cpp | 14 +- .../Scalar/CorrelatedValuePropagation.cpp | 39 +- lib/Transforms/Scalar/DCE.cpp | 6 +- .../Scalar/DeadStoreElimination.cpp | 28 +- lib/Transforms/Scalar/EarlyCSE.cpp | 18 +- lib/Transforms/Scalar/GVN.cpp | 79 +- lib/Transforms/Scalar/GlobalMerge.cpp | 137 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 67 +- lib/Transforms/Scalar/JumpThreading.cpp | 49 +- lib/Transforms/Scalar/LICM.cpp | 65 +- lib/Transforms/Scalar/LoopDeletion.cpp | 62 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 581 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 9 +- lib/Transforms/Scalar/LoopRotation.cpp | 31 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 643 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 31 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 43 +- lib/Transforms/Scalar/LowerAtomic.cpp | 6 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 10 +- lib/Transforms/Scalar/ObjCARC.cpp | 4232 - lib/Transforms/Scalar/Reassociate.cpp | 358 +- lib/Transforms/Scalar/Reg2Mem.cpp | 14 +- lib/Transforms/Scalar/SCCP.cpp | 40 +- lib/Transforms/Scalar/SROA.cpp | 1503 +- lib/Transforms/Scalar/Scalar.cpp | 15 +- .../Scalar/ScalarReplAggregates.cpp | 28 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 53 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 1481 +- lib/Transforms/Scalar/Sink.cpp | 6 +- .../Scalar/TailRecursionElimination.cpp | 44 +- lib/Transforms/Utils/AddrModeMatcher.cpp | 577 - lib/Transforms/Utils/BasicBlockUtils.cpp | 92 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 12 +- lib/Transforms/Utils/BuildLibCalls.cpp | 174 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 12 +- lib/Transforms/Utils/CMakeLists.txt | 1 - lib/Transforms/Utils/CloneFunction.cpp | 45 +- lib/Transforms/Utils/CloneModule.cpp | 10 +- lib/Transforms/Utils/CmpInstAnalysis.cpp | 4 +- lib/Transforms/Utils/CodeExtractor.cpp | 21 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 33 +- lib/Transforms/Utils/InlineFunction.cpp | 85 +- lib/Transforms/Utils/InstructionNamer.cpp | 4 +- lib/Transforms/Utils/IntegerDivision.cpp | 112 +- lib/Transforms/Utils/LCSSA.cpp | 14 +- lib/Transforms/Utils/Local.cpp | 101 +- lib/Transforms/Utils/LoopSimplify.cpp | 24 +- lib/Transforms/Utils/LoopUnroll.cpp | 2 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 2 +- lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 20 +- lib/Transforms/Utils/LowerInvoke.cpp | 18 +- lib/Transforms/Utils/LowerSwitch.cpp | 12 +- lib/Transforms/Utils/Mem2Reg.cpp | 8 +- lib/Transforms/Utils/MetaRenamer.cpp | 35 +- lib/Transforms/Utils/ModuleUtils.cpp | 8 +- .../Utils/PromoteMemoryToRegister.cpp | 30 +- lib/Transforms/Utils/SSAUpdater.cpp | 8 +- lib/Transforms/Utils/SimplifyCFG.cpp | 440 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 10 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 10 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 982 +- .../Utils/UnifyFunctionExitNodes.cpp | 10 +- lib/Transforms/Utils/ValueMapper.cpp | 29 +- lib/Transforms/Vectorize/BBVectorize.cpp | 1106 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 3583 +- lib/Transforms/Vectorize/Vectorize.cpp | 10 +- lib/VMCore/Attributes.cpp | 547 - lib/VMCore/AttributesImpl.h | 71 - lib/VMCore/TargetTransformInfo.cpp | 31 - projects/CMakeLists.txt | 9 +- projects/sample/Makefile.llvm.config.in | 6 + projects/sample/Makefile.llvm.rules | 27 +- projects/sample/autoconf/config.sub | 4 +- projects/sample/autoconf/configure.ac | 30 +- projects/sample/configure | 61 +- projects/sample/tools/sample/main.c | 4 +- runtime/libprofile/CMakeLists.txt | 1 - runtime/libprofile/GCDAProfiling.c | 205 - runtime/libprofile/Makefile | 4 +- runtime/libprofile/libprofile.exports | 12 - test/Analysis/BasicAA/intrinsics.ll | 7 +- test/Analysis/BasicAA/invariant_load.ll | 29 + test/Analysis/BasicAA/phi-spec-order.ll | 71 + test/Analysis/BasicAA/phi-speculation.ll | 65 +- test/Analysis/BasicAA/pure-const-dce.ll | 9 +- test/Analysis/CostModel/ARM/cast.ll | 547 + test/Analysis/CostModel/ARM/gep.ll | 43 + test/Analysis/CostModel/ARM/insertelement.ll | 46 + .../CostModel/ARM}/lit.local.cfg | 2 +- test/Analysis/CostModel/ARM/select.ll | 67 + test/Analysis/CostModel/ARM/shuffle.ll | 40 + .../CostModel/PowerPC/insert_extract.ll | 16 + test/Analysis/CostModel/PowerPC/lit.local.cfg | 6 + test/Analysis/CostModel/PowerPC/load_store.ll | 34 + test/Analysis/CostModel/X86/arith.ll | 90 +- test/Analysis/CostModel/X86/cast.ll | 89 +- test/Analysis/CostModel/X86/cmp.ll | 44 +- test/Analysis/CostModel/X86/gep.ll | 40 + test/Analysis/CostModel/X86/i32.ll | 4 +- test/Analysis/CostModel/X86/intrinsic-cost.ll | 32 + test/Analysis/CostModel/X86/load_store.ll | 64 + test/Analysis/CostModel/X86/testshiftashr.ll | 531 + test/Analysis/CostModel/X86/testshiftlshr.ll | 530 + test/Analysis/CostModel/X86/testshiftshl.ll | 530 + test/Analysis/CostModel/X86/tiny.ll | 2 +- .../Analysis/CostModel/X86/vectorized-loop.ll | 11 +- test/Analysis/CostModel/no_info.ll | 5 +- test/Analysis/DependenceAnalysis/Banerjee.ll | 152 +- test/Analysis/DependenceAnalysis/Coupled.ll | 329 +- test/Analysis/DependenceAnalysis/ExactRDIV.ll | 458 +- test/Analysis/DependenceAnalysis/ExactSIV.ll | 252 +- test/Analysis/DependenceAnalysis/GCD.ll | 158 +- .../DependenceAnalysis/Preliminary.ll | 646 +- .../DependenceAnalysis/Propagating.ll | 302 +- .../DependenceAnalysis/Separability.ll | 188 +- test/Analysis/DependenceAnalysis/StrongSIV.ll | 334 +- .../DependenceAnalysis/SymbolicRDIV.ll | 357 +- .../DependenceAnalysis/SymbolicSIV.ll | 271 +- .../DependenceAnalysis/WeakCrossingSIV.ll | 186 +- .../DependenceAnalysis/WeakZeroDstSIV.ll | 168 +- .../DependenceAnalysis/WeakZeroSrcSIV.ll | 168 +- test/Analysis/DependenceAnalysis/ZIV.ll | 39 +- test/Analysis/Dominators/invoke.ll | 2 +- test/Analysis/Profiling/lit.local.cfg | 15 + .../RegionInfo/20100809_bb_not_in_domtree.ll | 2 +- test/Analysis/RegionInfo/block_sort.ll | 1 + test/Analysis/RegionInfo/cond_loop.ll | 1 + .../RegionInfo/condition_complicated.ll | 1 + .../RegionInfo/condition_complicated_2.ll | 1 + .../RegionInfo/condition_forward_edge.ll | 1 + .../RegionInfo/condition_same_exit.ll | 1 + test/Analysis/RegionInfo/condition_simple.ll | 1 + test/Analysis/RegionInfo/exit_in_condition.ll | 1 + test/Analysis/RegionInfo/infinite_loop.ll | 1 + test/Analysis/RegionInfo/infinite_loop_2.ll | 1 + test/Analysis/RegionInfo/infinite_loop_3.ll | 1 + test/Analysis/RegionInfo/infinite_loop_4.ll | 1 + .../RegionInfo/loop_with_condition.ll | 1 + test/Analysis/RegionInfo/loops_1.ll | 1 + test/Analysis/RegionInfo/loops_2.ll | 1 + test/Analysis/RegionInfo/mix_1.ll | 1 + test/Analysis/RegionInfo/nested_loops.ll | 1 + test/Analysis/RegionInfo/next.ll | 1 + test/Analysis/RegionInfo/paper.ll | 1 + .../RegionInfo/two_loops_same_header.ll | 1 + .../2010-09-03-RequiredTransitive.ll | 4 +- .../2011-03-09-ExactNoMaxBECount.ll | 2 +- test/Analysis/ScalarEvolution/fold.ll | 2 +- test/Analysis/ScalarEvolution/scev-invalid.ll | 34 + .../TypeBasedAliasAnalysis/functionattrs.ll | 20 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 5 +- .../TypeBasedAliasAnalysis/placement-tbaa.ll | 104 + test/Assembler/2008-09-02-FunctionNotes.ll | 9 +- ...-02-05-FunctionLocalMetadataBecomesNull.ll | 9 +- test/Assembler/ConstantExprNoFold.ll | 23 + test/Assembler/externally-initialized.ll | 5 + test/Assembler/fast-math-flags.ll | 142 + test/Assembler/getelementptr.ll | 19 +- test/Assembler/getelementptr_vec_idx1.ll | 10 + test/Assembler/getelementptr_vec_idx2.ll | 10 + test/Assembler/getelementptr_vec_idx3.ll | 10 + test/Assembler/getelementptr_vec_struct.ll | 10 + test/Assembler/unnamed-addr.ll | 4 +- test/Bindings/Ocaml/vmcore.ml | 3 +- test/Bitcode/attributes.ll | 76 +- test/Bitcode/ptest-new.ll | 9 +- test/Bitcode/ptest-old.ll | 9 +- test/CMakeLists.txt | 22 +- test/CodeGen/AArch64/adc.ll | 54 + test/CodeGen/AArch64/addsub-shifted.ll | 295 + test/CodeGen/AArch64/addsub.ll | 127 + test/CodeGen/AArch64/addsub_ext.ll | 189 + test/CodeGen/AArch64/adrp-relocation.ll | 35 + test/CodeGen/AArch64/alloca.ll | 134 + test/CodeGen/AArch64/analyze-branch.ll | 231 + .../AArch64/atomic-ops-not-barriers.ll | 24 + test/CodeGen/AArch64/atomic-ops.ll | 1055 + test/CodeGen/AArch64/basic-pic.ll | 70 + test/CodeGen/AArch64/bitfield-insert-0.ll | 19 + test/CodeGen/AArch64/bitfield-insert.ll | 193 + test/CodeGen/AArch64/bitfield.ll | 218 + test/CodeGen/AArch64/blockaddress.ll | 18 + test/CodeGen/AArch64/bool-loads.ll | 55 + test/CodeGen/AArch64/breg.ll | 17 + test/CodeGen/AArch64/callee-save.ll | 86 + test/CodeGen/AArch64/compare-branch.ll | 38 + test/CodeGen/AArch64/cond-sel.ll | 213 + test/CodeGen/AArch64/directcond.ll | 84 + test/CodeGen/AArch64/dp-3source.ll | 163 + test/CodeGen/AArch64/dp1.ll | 152 + test/CodeGen/AArch64/dp2.ll | 169 + test/CodeGen/AArch64/elf-extern.ll | 21 + test/CodeGen/AArch64/extern-weak.ll | 35 + test/CodeGen/AArch64/extract.ll | 57 + test/CodeGen/AArch64/fastcc-reserved.ll | 58 + test/CodeGen/AArch64/fastcc.ll | 123 + test/CodeGen/AArch64/fcmp.ll | 81 + test/CodeGen/AArch64/fcvt-fixed.ll | 191 + test/CodeGen/AArch64/fcvt-int.ll | 151 + test/CodeGen/AArch64/flags-multiuse.ll | 35 + test/CodeGen/AArch64/floatdp_1source.ll | 138 + test/CodeGen/AArch64/floatdp_2source.ll | 60 + test/CodeGen/AArch64/fp-cond-sel.ll | 26 + test/CodeGen/AArch64/fp-dp3.ll | 102 + test/CodeGen/AArch64/fp128-folding.ll | 17 + test/CodeGen/AArch64/fp128.ll | 280 + test/CodeGen/AArch64/fpimm.ll | 34 + test/CodeGen/AArch64/func-argpassing.ll | 193 + test/CodeGen/AArch64/func-calls.ll | 140 + test/CodeGen/AArch64/global-alignment.ll | 69 + test/CodeGen/AArch64/got-abuse.ll | 23 + test/CodeGen/AArch64/i128-align.ll | 29 + test/CodeGen/AArch64/illegal-float-ops.ll | 221 + test/CodeGen/AArch64/init-array.ll | 9 + .../AArch64/inline-asm-constraints-badI.ll | 7 + .../AArch64/inline-asm-constraints-badK.ll | 7 + .../AArch64/inline-asm-constraints-badK2.ll | 7 + .../AArch64/inline-asm-constraints-badL.ll | 7 + .../CodeGen/AArch64/inline-asm-constraints.ll | 117 + test/CodeGen/AArch64/inline-asm-modifiers.ll | 125 + test/CodeGen/AArch64/jump-table.ll | 56 + test/CodeGen/AArch64/large-frame.ll | 114 + test/CodeGen/AArch64/ldst-regoffset.ll | 333 + test/CodeGen/AArch64/ldst-unscaledimm.ll | 218 + test/CodeGen/AArch64/ldst-unsignedimm.ll | 251 + test/CodeGen/AArch64/lit.local.cfg | 6 + test/CodeGen/AArch64/literal_pools.ll | 55 + test/CodeGen/AArch64/local_vars.ll | 57 + test/CodeGen/AArch64/logical-imm.ll | 84 + test/CodeGen/AArch64/logical_shifted_reg.ll | 224 + test/CodeGen/AArch64/logical_shifted_reg.s | 208 + test/CodeGen/AArch64/movw-consts.ll | 124 + test/CodeGen/AArch64/pic-eh-stubs.ll | 60 + .../AArch64/regress-bitcast-formals.ll | 11 + .../CodeGen/AArch64/regress-f128csel-flags.ll | 27 + test/CodeGen/AArch64/regress-tail-livereg.ll | 19 + test/CodeGen/AArch64/regress-tblgen-chains.ll | 36 + .../AArch64/regress-w29-reserved-with-fp.ll | 37 + .../AArch64/regress-wzr-allocatable.ll | 41 + test/CodeGen/AArch64/setcc-takes-i32.ll | 22 + test/CodeGen/AArch64/sibling-call.ll | 97 + test/CodeGen/AArch64/sincos-expansion.ll | 35 + test/CodeGen/AArch64/tail-call.ll | 94 + test/CodeGen/AArch64/tls-dynamic-together.ll | 18 + test/CodeGen/AArch64/tls-dynamics.ll | 121 + test/CodeGen/AArch64/tls-execs.ll | 63 + test/CodeGen/AArch64/tst-br.ll | 48 + test/CodeGen/AArch64/variadic.ll | 144 + test/CodeGen/AArch64/zero-reg.ll | 31 + test/CodeGen/ARM/2007-03-13-InstrSched.ll | 1 + .../ARM/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/ARM/2010-08-04-StackVariable.ll | 67 +- test/CodeGen/ARM/2010-11-30-reloc-movt.ll | 4 +- test/CodeGen/ARM/2010-12-15-elf-lcomm.ll | 3 +- .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll | 74 +- .../CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll | 75 +- .../ARM/2011-10-26-memset-with-neon.ll | 8 - .../ARM/2011-11-29-128bitArithmetics.ll | 4 +- test/CodeGen/ARM/2011-12-14-machine-sink.ll | 1 + .../CodeGen/ARM/2012-06-12-SchedMemLatency.ll | 2 + test/CodeGen/ARM/2012-08-09-neon-extload.ll | 12 +- test/CodeGen/ARM/2012-08-23-legalize-vmull.ll | 150 + .../ARM/2012-10-04-AAPCS-byval-align8.ll | 2 +- test/CodeGen/ARM/2012-11-14-subs_carry.ll | 31 + test/CodeGen/ARM/2013-01-21-PR14992.ll | 28 + test/CodeGen/ARM/2013-02-27-expand-vfma.ll | 31 + .../2013-04-05-overridden-loads-PR14824.ll | 110 + test/CodeGen/ARM/DbgValueOtherTargets.test | 1 + test/CodeGen/ARM/MergeConsecutiveStores.ll | 98 + test/CodeGen/ARM/PR15053.ll | 13 + test/CodeGen/ARM/a15-SD-dep.ll | 58 + test/CodeGen/ARM/a15-partial-update.ll | 38 + test/CodeGen/ARM/addrmode.ll | 1 + test/CodeGen/ARM/alloc-no-stack-realign.ll | 48 + test/CodeGen/ARM/arm-modifier.ll | 3 +- test/CodeGen/ARM/arm-ttype-target2.ll | 44 + test/CodeGen/ARM/atomic-64bit.ll | 290 +- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 65 + test/CodeGen/ARM/bfx.ll | 25 + test/CodeGen/ARM/call-tc.ll | 1 - test/CodeGen/ARM/call_nolink.ll | 14 +- test/CodeGen/ARM/coalesce-subregs.ll | 2 +- test/CodeGen/ARM/commute-movcc.ll | 4 +- test/CodeGen/ARM/crash.ll | 23 + test/CodeGen/ARM/debug-info-arg.ll | 43 +- test/CodeGen/ARM/debug-info-blocks.ll | 253 +- test/CodeGen/ARM/debug-info-branch-folding.ll | 93 +- test/CodeGen/ARM/debug-info-d16-reg.ll | 62 +- test/CodeGen/ARM/debug-info-qreg.ll | 91 +- test/CodeGen/ARM/debug-info-s16-reg.ll | 60 +- test/CodeGen/ARM/debug-info-sreg2.ll | 25 +- test/CodeGen/ARM/domain-conv-vmovs.ll | 22 +- test/CodeGen/ARM/eh-dispcont.ll | 89 + test/CodeGen/ARM/ehabi-filters.ll | 77 + test/CodeGen/ARM/ehabi-mc-cantunwind.ll | 14 + test/CodeGen/ARM/ehabi-mc-section-group.ll | 79 + test/CodeGen/ARM/ehabi-mc-section.ll | 59 + test/CodeGen/ARM/ehabi-mc-sh_link.ll | 47 + test/CodeGen/ARM/ehabi-mc.ll | 59 + test/CodeGen/ARM/ehabi-no-landingpad.ll | 18 + test/CodeGen/ARM/elf-lcomm-align.ll | 5 +- test/CodeGen/ARM/extload-knownzero.ll | 26 + test/CodeGen/ARM/fabs-neon.ll | 17 + test/CodeGen/ARM/fabss.ll | 6 +- test/CodeGen/ARM/fadds.ll | 10 +- test/CodeGen/ARM/fast-isel-GEP-coalesce.ll | 4 +- test/CodeGen/ARM/fast-isel-br-const.ll | 4 +- .../ARM/fast-isel-call-multi-reg-return.ll | 4 +- test/CodeGen/ARM/fast-isel-crash.ll | 2 +- test/CodeGen/ARM/fast-isel-crash2.ll | 2 +- test/CodeGen/ARM/fast-isel-deadcode.ll | 2 +- test/CodeGen/ARM/fast-isel-fold.ll | 4 +- test/CodeGen/ARM/fast-isel-frameaddr.ll | 8 +- test/CodeGen/ARM/fast-isel-icmp.ll | 30 + test/CodeGen/ARM/fast-isel-indirectbr.ll | 4 +- test/CodeGen/ARM/fast-isel-intrinsic.ll | 116 +- test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll | 2 +- test/CodeGen/ARM/fast-isel-pred.ll | 2 +- test/CodeGen/ARM/fast-isel-redefinition.ll | 2 +- test/CodeGen/ARM/fast-isel-static.ll | 4 +- test/CodeGen/ARM/fdivs.ll | 8 +- test/CodeGen/ARM/fmuls.ll | 10 +- test/CodeGen/ARM/fnegs.ll | 14 +- test/CodeGen/ARM/fnmscs.ll | 34 +- test/CodeGen/ARM/fp_convert.ll | 4 +- test/CodeGen/ARM/fsubs.ll | 7 +- test/CodeGen/ARM/global-merge-addrspace.ll | 12 + test/CodeGen/ARM/global-merge.ll | 53 +- test/CodeGen/ARM/indirect-reg-input.ll | 14 + test/CodeGen/ARM/inlineasm-64bit.ll | 54 + test/CodeGen/ARM/inlineasm3.ll | 2 +- test/CodeGen/ARM/invoke-donothing-assert.ll | 73 + test/CodeGen/ARM/lit.local.cfg | 2 +- test/CodeGen/ARM/lsr-icmp-imm.ll | 4 +- test/CodeGen/ARM/machine-cse-cmp.ll | 32 + test/CodeGen/ARM/memcpy-inline.ll | 109 +- test/CodeGen/ARM/memset-inline.ll | 30 + test/CodeGen/ARM/neon-spfp.ll | 76 + test/CodeGen/ARM/neon_cmp.ll | 15 + test/CodeGen/ARM/neon_fpconv.ll | 42 + test/CodeGen/ARM/neon_ld2.ll | 10 +- test/CodeGen/ARM/neon_minmax.ll | 2 +- test/CodeGen/ARM/popcnt.ll | 191 + test/CodeGen/ARM/reg_asc_order.ll | 16 - test/CodeGen/ARM/reg_sequence.ll | 4 +- test/CodeGen/ARM/ret_sret_vector.ll | 12 + .../CodeGen/ARM/sjlj-prepare-critical-edge.ll | 67 + test/CodeGen/ARM/spill-q.ll | 4 +- test/CodeGen/ARM/subreg-remat.ll | 6 +- test/CodeGen/ARM/trap.ll | 28 + test/CodeGen/ARM/vcvt.ll | 172 + test/CodeGen/ARM/vector-DAGCombine.ll | 27 + test/CodeGen/ARM/vfloatintrinsics.ll | 377 + test/CodeGen/ARM/vld1.ll | 8 +- test/CodeGen/ARM/vld2.ll | 14 +- test/CodeGen/ARM/vld3.ll | 8 +- test/CodeGen/ARM/vld4.ll | 18 +- test/CodeGen/ARM/vlddup.ll | 12 +- test/CodeGen/ARM/vldlane.ll | 28 +- test/CodeGen/ARM/vselect_imax.ll | 111 + test/CodeGen/ARM/vst1.ll | 8 +- test/CodeGen/ARM/vst2.ll | 14 +- test/CodeGen/ARM/vst3.ll | 8 +- test/CodeGen/ARM/vst4.ll | 14 +- test/CodeGen/ARM/vstlane.ll | 26 +- test/CodeGen/ARM/widen-vmovs.ll | 2 +- test/CodeGen/ARM/zextload_demandedbits.ll | 35 + test/CodeGen/CPP/2007-06-16-Funcname.ll | 1 - test/CodeGen/CellSPU/2009-01-01-BrCond.ll | 31 - .../2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/CellSPU/and_ops.ll | 282 - test/CodeGen/CellSPU/arg_ret.ll | 34 - test/CodeGen/CellSPU/bigstack.ll | 17 - test/CodeGen/CellSPU/bss.ll | 11 - test/CodeGen/CellSPU/call.ll | 49 - test/CodeGen/CellSPU/crash.ll | 8 - test/CodeGen/CellSPU/ctpop.ll | 30 - test/CodeGen/CellSPU/div_ops.ll | 22 - test/CodeGen/CellSPU/dp_farith.ll | 102 - test/CodeGen/CellSPU/eqv.ll | 152 - test/CodeGen/CellSPU/extract_elt.ll | 277 - test/CodeGen/CellSPU/fcmp32.ll | 36 - test/CodeGen/CellSPU/fcmp64.ll | 7 - test/CodeGen/CellSPU/fdiv.ll | 22 - test/CodeGen/CellSPU/fneg-fabs.ll | 42 - test/CodeGen/CellSPU/i64ops.ll | 57 - test/CodeGen/CellSPU/i8ops.ll | 25 - test/CodeGen/CellSPU/icmp16.ll | 574 - test/CodeGen/CellSPU/icmp32.ll | 575 - test/CodeGen/CellSPU/icmp64.ll | 146 - test/CodeGen/CellSPU/icmp8.ll | 446 - test/CodeGen/CellSPU/immed16.ll | 40 - test/CodeGen/CellSPU/immed32.ll | 83 - test/CodeGen/CellSPU/immed64.ll | 95 - test/CodeGen/CellSPU/int2fp.ll | 41 - test/CodeGen/CellSPU/intrinsics_branch.ll | 150 - test/CodeGen/CellSPU/intrinsics_float.ll | 94 - test/CodeGen/CellSPU/intrinsics_logical.ll | 49 - test/CodeGen/CellSPU/jumptable.ll | 21 - test/CodeGen/CellSPU/loads.ll | 59 - test/CodeGen/CellSPU/mul-with-overflow.ll | 15 - test/CodeGen/CellSPU/mul_ops.ll | 88 - test/CodeGen/CellSPU/nand.ll | 125 - test/CodeGen/CellSPU/or_ops.ll | 278 - test/CodeGen/CellSPU/private.ll | 19 - test/CodeGen/CellSPU/rotate_ops.ll | 172 - test/CodeGen/CellSPU/select_bits.ll | 572 - test/CodeGen/CellSPU/sext128.ll | 71 - test/CodeGen/CellSPU/shift_ops.ll | 348 - test/CodeGen/CellSPU/shuffles.ll | 69 - test/CodeGen/CellSPU/sp_farith.ll | 90 - test/CodeGen/CellSPU/stores.ll | 181 - test/CodeGen/CellSPU/storestruct.ll | 13 - test/CodeGen/CellSPU/struct_1.ll | 147 - test/CodeGen/CellSPU/sub_ops.ll | 26 - test/CodeGen/CellSPU/trunc.ll | 94 - .../CellSPU/useful-harnesses/README.txt | 5 - .../CellSPU/useful-harnesses/i32operations.c | 69 - .../CellSPU/useful-harnesses/i64operations.c | 673 - .../CellSPU/useful-harnesses/i64operations.h | 43 - .../CellSPU/useful-harnesses/vecoperations.c | 179 - test/CodeGen/CellSPU/v2f32.ll | 78 - test/CodeGen/CellSPU/v2i32.ll | 61 - test/CodeGen/CellSPU/vec_const.ll | 154 - test/CodeGen/CellSPU/vecinsert.ll | 131 - .../CodeGen/Generic/2008-02-20-MatchingMem.ll | 1 + .../Generic/2013-03-20-APFloatCrash.ll | 7 + test/CodeGen/Generic/dag-combine-crash.ll | 21 + .../CodeGen/Generic/inline-asm-mem-clobber.ll | 21 + ...e-return-values-cross-block-with-invoke.ll | 2 +- test/CodeGen/Generic/select-cc.ll | 2 +- test/CodeGen/Generic/vector.ll | 7 +- test/CodeGen/Hexagon/absaddr-store.ll | 46 + test/CodeGen/Hexagon/adde.ll | 34 + test/CodeGen/Hexagon/args.ll | 7 +- test/CodeGen/Hexagon/ashift-left-right.ll | 21 + test/CodeGen/Hexagon/block-addr.ll | 64 + test/CodeGen/Hexagon/cext-check.ll | 57 + test/CodeGen/Hexagon/cext-valid-packet1.ll | 18 + test/CodeGen/Hexagon/cext-valid-packet2.ll | 43 + test/CodeGen/Hexagon/cmp-to-genreg.ll | 34 + test/CodeGen/Hexagon/cmp-to-predreg.ll | 43 + test/CodeGen/Hexagon/cmp_pred.ll | 115 + test/CodeGen/Hexagon/cmp_pred_reg.ll | 115 + test/CodeGen/Hexagon/cmpb_pred.ll | 92 + test/CodeGen/Hexagon/combine_ir.ll | 55 + test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll | 34 + test/CodeGen/Hexagon/dualstore.ll | 6 +- test/CodeGen/Hexagon/gp-plus-offset-load.ll | 51 + test/CodeGen/Hexagon/gp-plus-offset-store.ll | 35 + test/CodeGen/Hexagon/gp-rel.ll | 33 + test/CodeGen/Hexagon/hwloop-cleanup.ll | 86 + test/CodeGen/Hexagon/hwloop-const.ll | 31 + test/CodeGen/Hexagon/hwloop-dbg.ll | 64 + test/CodeGen/Hexagon/hwloop-le.ll | 438 + test/CodeGen/Hexagon/hwloop-lt.ll | 438 + test/CodeGen/Hexagon/hwloop-lt1.ll | 32 + test/CodeGen/Hexagon/hwloop-ne.ll | 438 + test/CodeGen/Hexagon/i16_VarArg.ll | 40 + test/CodeGen/Hexagon/i1_VarArg.ll | 44 + test/CodeGen/Hexagon/i8_VarArg.ll | 40 + .../Hexagon/idxload-with-zero-offset.ll | 70 + test/CodeGen/Hexagon/indirect-br.ll | 14 + test/CodeGen/Hexagon/memops.ll | 1369 + test/CodeGen/Hexagon/memops1.ll | 33 + test/CodeGen/Hexagon/memops2.ll | 32 + test/CodeGen/Hexagon/memops3.ll | 31 + test/CodeGen/Hexagon/misaligned-access.ll | 16 + test/CodeGen/Hexagon/postinc-load.ll | 29 + test/CodeGen/Hexagon/postinc-store.ll | 29 + test/CodeGen/Hexagon/pred-absolute-store.ll | 19 + test/CodeGen/Hexagon/predicate-copy.ll | 8 + test/CodeGen/Hexagon/struct_args.ll | 2 +- test/CodeGen/Hexagon/sube.ll | 29 + test/CodeGen/Hexagon/validate-offset.ll | 36 + test/CodeGen/Hexagon/zextloadi1.ll | 25 + test/CodeGen/Inputs/DbgValueOtherTargets.ll | 28 + .../MBlaze/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/MBlaze/DbgValueOtherTargets.test | 1 + test/CodeGen/MBlaze/lit.local.cfg | 2 +- .../MSP430/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/MSP430/DbgValueOtherTargets.test | 1 + test/CodeGen/MSP430/byval.ll | 26 + test/CodeGen/MSP430/lit.local.cfg | 2 +- test/CodeGen/MSP430/vararg.ll | 50 + .../Mips/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/Mips/2010-07-20-Switch.ll | 35 +- test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll | 11 + test/CodeGen/Mips/DbgValueOtherTargets.test | 1 + test/CodeGen/Mips/addi.ll | 30 + test/CodeGen/Mips/addressing-mode.ll | 41 + test/CodeGen/Mips/align16.ll | 31 + test/CodeGen/Mips/alloca.ll | 10 +- test/CodeGen/Mips/alloca16.ll | 4 +- test/CodeGen/Mips/br-jmp.ll | 5 + test/CodeGen/Mips/brdelayslot.ll | 141 + test/CodeGen/Mips/cmov.ll | 137 + test/CodeGen/Mips/dsp-patterns.ll | 31 + test/CodeGen/Mips/eh-return32.ll | 85 + test/CodeGen/Mips/eh-return64.ll | 87 + test/CodeGen/Mips/ex2.ll | 29 + test/CodeGen/Mips/fp16static.ll | 13 + test/CodeGen/Mips/frame-address.ll | 4 +- test/CodeGen/Mips/gpreg-lazy-binding.ll | 27 + test/CodeGen/Mips/helloworld.ll | 31 +- test/CodeGen/Mips/hf16_1.ll | 256 + test/CodeGen/Mips/i64arg.ll | 14 +- test/CodeGen/Mips/inlineasm_constraint.ll | 9 + test/CodeGen/Mips/jtstat.ll | 71 + test/CodeGen/Mips/largefr1.ll | 61 + test/CodeGen/Mips/lit.local.cfg | 2 +- test/CodeGen/Mips/madd-msub.ll | 22 +- test/CodeGen/Mips/mips16ex.ll | 87 + test/CodeGen/Mips/mips16fpe.ll | 381 + test/CodeGen/Mips/mips64-f128-call.ll | 45 + test/CodeGen/Mips/mips64-f128.ll | 646 + test/CodeGen/Mips/mips64-libcall.ll | 29 + test/CodeGen/Mips/mips64-sret.ll | 2 +- test/CodeGen/Mips/o32_cc_byval.ll | 10 +- test/CodeGen/Mips/return-vector-float4.ll | 12 - test/CodeGen/Mips/return_address.ll | 6 +- test/CodeGen/Mips/selTBteqzCmpi.ll | 26 + test/CodeGen/Mips/selTBtnezCmpi.ll | 26 + test/CodeGen/Mips/selTBtnezSlti.ll | 25 + test/CodeGen/Mips/seleq.ll | 95 + test/CodeGen/Mips/seleqk.ll | 91 + test/CodeGen/Mips/selgek.ll | 94 + test/CodeGen/Mips/selgt.ll | 98 + test/CodeGen/Mips/selle.ll | 96 + test/CodeGen/Mips/selltk.ll | 93 + test/CodeGen/Mips/selne.ll | 97 + test/CodeGen/Mips/selnek.ll | 107 + test/CodeGen/Mips/selpat.ll | 32 +- test/CodeGen/Mips/seteq.ll | 2 +- test/CodeGen/Mips/seteqz.ll | 4 +- test/CodeGen/Mips/setge.ll | 2 +- test/CodeGen/Mips/setgek.ll | 2 +- test/CodeGen/Mips/setle.ll | 2 +- test/CodeGen/Mips/setlt.ll | 2 +- test/CodeGen/Mips/setltk.ll | 2 +- test/CodeGen/Mips/setne.ll | 2 +- test/CodeGen/Mips/setuge.ll | 2 +- test/CodeGen/Mips/setugt.ll | 2 +- test/CodeGen/Mips/setule.ll | 2 +- test/CodeGen/Mips/setult.ll | 2 +- test/CodeGen/Mips/setultk.ll | 4 +- test/CodeGen/Mips/tls.ll | 2 +- test/CodeGen/Mips/vector-setcc.ll | 16 + test/CodeGen/NVPTX/annotations.ll | 2 - test/CodeGen/NVPTX/arithmetic-fp-sm10.ll | 72 - test/CodeGen/NVPTX/arithmetic-int.ll | 2 - test/CodeGen/NVPTX/calling-conv.ll | 2 - test/CodeGen/NVPTX/compare-int.ll | 2 - test/CodeGen/NVPTX/convert-fp.ll | 2 - test/CodeGen/NVPTX/convert-int-sm10.ll | 55 - test/CodeGen/NVPTX/intrin-nocapture.ll | 21 + test/CodeGen/NVPTX/intrinsic-old.ll | 2 - test/CodeGen/NVPTX/intrinsics.ll | 2 - test/CodeGen/NVPTX/ld-addrspace.ll | 2 - test/CodeGen/NVPTX/nvvm-reflect.ll | 34 + test/CodeGen/NVPTX/sched1.ll | 31 + test/CodeGen/NVPTX/sched2.ll | 32 + test/CodeGen/NVPTX/sm-version-10.ll | 6 - test/CodeGen/NVPTX/sm-version-11.ll | 6 - test/CodeGen/NVPTX/sm-version-12.ll | 6 - test/CodeGen/NVPTX/sm-version-13.ll | 6 - test/CodeGen/NVPTX/st-addrspace.ll | 2 - test/CodeGen/NVPTX/tuple-literal.ll | 5 + test/CodeGen/NVPTX/vector-args.ll | 27 + test/CodeGen/NVPTX/vector-compare.ll | 19 + test/CodeGen/NVPTX/vector-loads.ll | 66 + test/CodeGen/NVPTX/vector-select.ll | 16 + test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll | 2 +- .../PowerPC/2008-07-10-SplatMiscompile.ll | 1 + ...009-08-17-inline-asm-addr-mode-breakage.ll | 4 +- test/CodeGen/PowerPC/2010-02-12-saveCR.ll | 39 +- .../2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/PowerPC/2010-05-03-retaddr1.ll | 4 +- .../PowerPC/2010-12-18-PPCStackRefs.ll | 2 +- .../PowerPC/2012-09-16-TOC-entry-check.ll | 2 +- .../CodeGen/PowerPC/2012-11-16-mischedcall.ll | 33 + .../CodeGen/PowerPC/DbgValueOtherTargets.test | 1 + test/CodeGen/PowerPC/LargeAbsoluteAddr.ll | 6 +- test/CodeGen/PowerPC/a2q-stackalign.ll | 23 + test/CodeGen/PowerPC/a2q.ll | 10 + test/CodeGen/PowerPC/allocate-r0.ll | 18 + test/CodeGen/PowerPC/anon_aggr.ll | 99 + test/CodeGen/PowerPC/asym-regclass-copy.ll | 56 + test/CodeGen/PowerPC/atomic-1.ll | 6 +- test/CodeGen/PowerPC/atomic-2.ll | 20 + test/CodeGen/PowerPC/available-externally.ll | 54 +- test/CodeGen/PowerPC/bswap-load-store.ll | 35 +- test/CodeGen/PowerPC/buildvec_canonicalize.ll | 16 +- test/CodeGen/PowerPC/complex-return.ll | 55 + test/CodeGen/PowerPC/cr-spills.ll | 409 + test/CodeGen/PowerPC/ctr-cleanup.ll | 25 + test/CodeGen/PowerPC/cttz.ll | 4 +- test/CodeGen/PowerPC/dbg.ll | 10 +- test/CodeGen/PowerPC/dcbt-sched.ll | 22 + test/CodeGen/PowerPC/float-asmprint.ll | 34 + test/CodeGen/PowerPC/float-to-int.ll | 93 + test/CodeGen/PowerPC/frame-size.ll | 32 + test/CodeGen/PowerPC/frameaddr.ll | 47 + test/CodeGen/PowerPC/i32-to-float.ll | 82 + test/CodeGen/PowerPC/i64-to-float.ll | 52 + test/CodeGen/PowerPC/i64_fp_round.ll | 14 +- test/CodeGen/PowerPC/iabs.ll | 1 + test/CodeGen/PowerPC/in-asm-f64-reg.ll | 11 + test/CodeGen/PowerPC/jaggedstructs.ll | 10 +- test/CodeGen/PowerPC/lbzux.ll | 2 +- test/CodeGen/PowerPC/lit.local.cfg | 2 +- test/CodeGen/PowerPC/load-shift-combine.ll | 34 + test/CodeGen/PowerPC/mcm-1.ll | 27 + test/CodeGen/PowerPC/mcm-10.ll | 25 + test/CodeGen/PowerPC/mcm-11.ll | 27 + test/CodeGen/PowerPC/mcm-12.ll | 18 + test/CodeGen/PowerPC/mcm-2.ll | 37 + test/CodeGen/PowerPC/mcm-3.ll | 41 + test/CodeGen/PowerPC/mcm-4.ll | 27 + test/CodeGen/PowerPC/mcm-5.ll | 60 + test/CodeGen/PowerPC/mcm-6.ll | 28 + test/CodeGen/PowerPC/mcm-7.ll | 26 + test/CodeGen/PowerPC/mcm-8.ll | 25 + test/CodeGen/PowerPC/mcm-9.ll | 28 + test/CodeGen/PowerPC/mcm-default.ll | 26 + test/CodeGen/PowerPC/mcm-obj-2.ll | 77 + test/CodeGen/PowerPC/mcm-obj.ll | 268 + test/CodeGen/PowerPC/mem_update.ll | 2 +- .../PowerPC/misched-inorder-latency.ll | 55 + test/CodeGen/PowerPC/negctr.ll | 83 + test/CodeGen/PowerPC/popcnt.ll | 40 + test/CodeGen/PowerPC/ppc64-toc.ll | 2 +- test/CodeGen/PowerPC/pr15031.ll | 370 + test/CodeGen/PowerPC/pr15359.ll | 20 + test/CodeGen/PowerPC/pr15630.ll | 16 + test/CodeGen/PowerPC/pr15632.ll | 15 + test/CodeGen/PowerPC/pwr3-6x.ll | 14 + test/CodeGen/PowerPC/quadint-return.ll | 19 + test/CodeGen/PowerPC/r31.ll | 15 + test/CodeGen/PowerPC/recipest.ll | 226 + test/CodeGen/PowerPC/rlwimi3.ll | 1 + test/CodeGen/PowerPC/rounding-ops.ll | 145 + test/CodeGen/PowerPC/s000-alias-misched.ll | 101 + test/CodeGen/PowerPC/sdag-ppcf128.ll | 15 + test/CodeGen/PowerPC/sjlj.ll | 112 + test/CodeGen/PowerPC/stdux-constuse.ll | 47 + test/CodeGen/PowerPC/stfiwx-2.ll | 9 +- test/CodeGen/PowerPC/store-update.ll | 170 + test/CodeGen/PowerPC/structsinmem.ll | 22 +- test/CodeGen/PowerPC/structsinregs.ll | 42 +- test/CodeGen/PowerPC/stubs.ll | 4 +- test/CodeGen/PowerPC/stwu8.ll | 2 +- test/CodeGen/PowerPC/svr4-redzone.ll | 39 + test/CodeGen/PowerPC/tls-2.ll | 15 + test/CodeGen/PowerPC/tls-gd-obj.ll | 41 + test/CodeGen/PowerPC/tls-gd.ll | 23 + test/CodeGen/PowerPC/tls-ie-obj.ll | 36 + test/CodeGen/PowerPC/tls-ie.ll | 22 + test/CodeGen/PowerPC/tls-ld-2.ll | 24 + test/CodeGen/PowerPC/tls-ld-obj.ll | 50 + test/CodeGen/PowerPC/tls-ld.ll | 24 + test/CodeGen/PowerPC/tls.ll | 17 +- test/CodeGen/PowerPC/unal4-std.ll | 27 + test/CodeGen/PowerPC/unaligned.ll | 73 + test/CodeGen/PowerPC/vaddsplat.ll | 149 + test/CodeGen/PowerPC/varargs.ll | 21 +- test/CodeGen/PowerPC/vec_cmp.ll | 2 +- test/CodeGen/PowerPC/vec_constants.ll | 44 +- test/CodeGen/PowerPC/vec_extload.ll | 98 +- test/CodeGen/PowerPC/vec_mul.ll | 27 +- test/CodeGen/PowerPC/vec_rounding.ll | 172 + test/CodeGen/PowerPC/vec_select.ll | 7 + test/CodeGen/PowerPC/vrsave-spill.ll | 19 + test/CodeGen/PowerPC/vrspill.ll | 4 +- test/CodeGen/R600/128bit-kernel-args.ll | 18 + test/CodeGen/R600/add.v4i32.ll | 15 + test/CodeGen/R600/alu-split.ll | 850 + test/CodeGen/R600/and.v4i32.ll | 15 + .../dagcombiner-bug-illegal-vec4-int-to-fp.ll | 36 + .../R600/disconnected-predset-break-bug.ll | 29 + test/CodeGen/R600/fabs.ll | 16 + test/CodeGen/R600/fadd.ll | 16 + test/CodeGen/R600/fadd.v4f32.ll | 15 + test/CodeGen/R600/fcmp-cnd.ll | 14 + test/CodeGen/R600/fcmp-cnde-int-args.ll | 16 + test/CodeGen/R600/fcmp.ll | 37 + test/CodeGen/R600/fdiv.v4f32.ll | 19 + test/CodeGen/R600/floor.ll | 16 + test/CodeGen/R600/fmad.ll | 19 + test/CodeGen/R600/fmax.ll | 16 + test/CodeGen/R600/fmin.ll | 16 + test/CodeGen/R600/fmul.ll | 16 + test/CodeGen/R600/fmul.v4f32.ll | 15 + test/CodeGen/R600/fsub.ll | 16 + test/CodeGen/R600/fsub.v4f32.ll | 15 + test/CodeGen/R600/i8_to_double_to_float.ll | 11 + .../R600/icmp-select-sete-reverse-args.ll | 18 + test/CodeGen/R600/jump_address.ll | 50 + test/CodeGen/R600/kcache-fold.ll | 100 + .../R600/legalizedag-bug-expand-setcc.ll | 26 + test/CodeGen/R600/lit.local.cfg | 13 + test/CodeGen/R600/literals.ll | 32 + test/CodeGen/R600/llvm.AMDGPU.mul.ll | 17 + test/CodeGen/R600/llvm.AMDGPU.tex.ll | 42 + test/CodeGen/R600/llvm.AMDGPU.trunc.ll | 16 + .../R600/llvm.SI.fs.interp.constant.ll | 21 + test/CodeGen/R600/llvm.SI.sample.ll | 106 + test/CodeGen/R600/llvm.cos.ll | 16 + test/CodeGen/R600/llvm.pow.ll | 19 + test/CodeGen/R600/llvm.sin.ll | 16 + .../R600/load.constant_addrspace.f32.ll | 9 + test/CodeGen/R600/load.i8.ll | 10 + test/CodeGen/R600/lshl.ll | 14 + test/CodeGen/R600/lshr.ll | 14 + test/CodeGen/R600/mulhu.ll | 16 + test/CodeGen/R600/predicates.ll | 104 + test/CodeGen/R600/reciprocal.ll | 16 + .../R600/schedule-fs-loop-nested-if.ll | 83 + test/CodeGen/R600/schedule-fs-loop-nested.ll | 88 + test/CodeGen/R600/schedule-fs-loop.ll | 55 + test/CodeGen/R600/schedule-if-2.ll | 94 + test/CodeGen/R600/schedule-if.ll | 46 + .../R600/schedule-vs-if-nested-loop.ll | 134 + test/CodeGen/R600/sdiv.ll | 21 + .../R600/selectcc-icmp-select-float.ll | 15 + test/CodeGen/R600/selectcc-opt.ll | 64 + test/CodeGen/R600/selectcc_cnde.ll | 11 + test/CodeGen/R600/selectcc_cnde_int.ll | 11 + test/CodeGen/R600/set-dx10.ll | 137 + test/CodeGen/R600/setcc.v4i32.ll | 12 + test/CodeGen/R600/seto.ll | 13 + test/CodeGen/R600/setuo.ll | 13 + test/CodeGen/R600/short-args.ll | 41 + test/CodeGen/R600/store.v4f32.ll | 9 + test/CodeGen/R600/store.v4i32.ll | 9 + test/CodeGen/R600/udiv.v4i32.ll | 15 + test/CodeGen/R600/unsupported-cc.ll | 83 + test/CodeGen/R600/urem.v4i32.ll | 15 + test/CodeGen/R600/vec4-expand.ll | 53 + test/CodeGen/SI/sanity.ll | 37 + .../SPARC/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/SPARC/64bit.ll | 146 + test/CodeGen/SPARC/64cond.ll | 56 + test/CodeGen/SPARC/DbgValueOtherTargets.test | 1 + test/CodeGen/SPARC/ctpop.ll | 2 +- test/CodeGen/SPARC/lit.local.cfg | 2 +- .../Thumb/2010-04-07-DbgValueOtherTargets.ll | 28 - .../CodeGen/Thumb/2010-07-15-debugOrdering.ll | 10 +- test/CodeGen/Thumb/DbgValueOtherTargets.test | 1 + test/CodeGen/Thumb/iabs.ll | 26 +- test/CodeGen/Thumb/lit.local.cfg | 2 +- .../Thumb/stack-coloring-without-frame-ptr.ll | 29 + .../2013-02-19-tail-call-register-hint.ll | 53 + ...03-02-vduplane-nonconstant-source-index.ll | 14 + ...013-03-06-vector-sext-operand-scalarize.ll | 19 + test/CodeGen/Thumb2/aligned-spill.ll | 14 +- test/CodeGen/Thumb2/cortex-fp.ll | 2 +- test/CodeGen/Thumb2/crash.ll | 9 + test/CodeGen/Thumb2/thumb2-ldr_post.ll | 4 +- test/CodeGen/Thumb2/thumb2-mul.ll | 2 +- test/CodeGen/Thumb2/thumb2-shifter.ll | 82 +- test/CodeGen/Thumb2/thumb2-spill-q.ll | 4 +- .../X86/2003-08-03-CallArgLiveRanges.ll | 1 + test/CodeGen/X86/2006-03-02-InstrSchedBug.ll | 1 + .../X86/2006-05-01-SchedCausingSpills.ll | 1 + test/CodeGen/X86/2006-05-02-InstrSched1.ll | 1 + test/CodeGen/X86/2006-05-02-InstrSched2.ll | 1 + test/CodeGen/X86/2006-05-11-InstrSched.ll | 1 + test/CodeGen/X86/2008-02-18-TailMergingBug.ll | 1 + .../X86/2008-05-22-FoldUnalignedLoad.ll | 8 +- test/CodeGen/X86/2008-08-31-EH_RETURN64.ll | 42 +- test/CodeGen/X86/2008-10-27-CoalescerBug.ll | 1 + .../X86/2008-10-27-StackRealignment.ll | 22 - test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll | 2 +- test/CodeGen/X86/2009-02-25-CommuteBug.ll | 1 + test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 1 + test/CodeGen/X86/2009-03-23-MultiUseSched.ll | 1 + test/CodeGen/X86/2009-04-16-SpillerUnfold.ll | 1 + test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll | 7 +- test/CodeGen/X86/2010-01-18-DbgValue.ll | 30 +- test/CodeGen/X86/2010-01-19-OptExtBug.ll | 1 + .../X86/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/X86/2010-05-25-DotDebugLoc.ll | 42 +- test/CodeGen/X86/2010-05-26-DotDebugLoc.ll | 61 +- test/CodeGen/X86/2010-05-28-Crash.ll | 26 +- .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll | 55 +- test/CodeGen/X86/2010-07-06-DbgCrash.ll | 2 +- test/CodeGen/X86/2010-08-04-StackVariable.ll | 65 +- test/CodeGen/X86/2010-08-10-DbgConstant.ll | 25 - test/CodeGen/X86/2010-09-16-EmptyFilename.ll | 25 +- test/CodeGen/X86/2010-11-02-DbgParameter.ll | 28 +- test/CodeGen/X86/2010-12-02-MC-Set.ll | 15 +- .../X86/2011-01-24-DbgValue-Before-Use.ll | 44 +- test/CodeGen/X86/2011-06-12-FastAllocSpill.ll | 1 + test/CodeGen/X86/2011-09-14-valcoalesce.ll | 2 +- test/CodeGen/X86/2011-10-19-LegelizeLoad.ll | 4 +- test/CodeGen/X86/2011-11-30-or.ll | 12 +- test/CodeGen/X86/2012-01-11-split-cv.ll | 2 +- test/CodeGen/X86/2012-03-26-PostRALICMBug.ll | 1 + .../2012-07-15-BuildVectorPromote.ll | 2 +- test/CodeGen/X86/2012-07-15-broadcastfold.ll | 2 +- .../X86/2012-11-28-merge-store-alias.ll | 52 + test/CodeGen/X86/2012-11-30-handlemove-dbg.ll | 51 + test/CodeGen/X86/2012-11-30-misched-dbg.ll | 136 + test/CodeGen/X86/2012-11-30-regpres-dbg.ll | 44 + .../X86/2012-12-06-python27-miscompile.ll | 23 + test/CodeGen/X86/2012-12-1-merge-multiple.ll | 31 + .../CodeGen/X86/2012-12-12-DAGCombineCrash.ll | 46 + test/CodeGen/X86/2012-12-14-v8fp80-crash.ll | 22 + .../CodeGen/X86/2012-12-19-NoImplicitFloat.ll | 17 + test/CodeGen/X86/2013-01-09-DAGCombineBug.ll | 74 + test/CodeGen/X86/2013-02-12-ShuffleToZext.ll | 14 + test/CodeGen/X86/2013-03-13-VEX-DestReg.ll | 31 + test/CodeGen/X86/Atomics-64.ll | 4 +- test/CodeGen/X86/DbgValueOtherTargets.test | 2 + test/CodeGen/X86/GC/erlang-gc.ll | 25 + test/CodeGen/X86/GC/ocaml-gc.ll | 31 + test/CodeGen/X86/MachineSink-DbgValue.ll | 27 +- test/CodeGen/X86/MachineSink-PHIUse.ll | 1 + test/CodeGen/X86/MergeConsecutiveStores.ll | 132 +- test/CodeGen/X86/WidenArith.ll | 23 + .../X86/atom-bypass-slow-division-64.ll | 46 + test/CodeGen/X86/atom-bypass-slow-division.ll | 50 +- .../atom-call-reg-indirect-foldedreload32.ll | 77 + .../atom-call-reg-indirect-foldedreload64.ll | 91 + test/CodeGen/X86/atom-call-reg-indirect.ll | 45 + test/CodeGen/X86/atom-pad-short-functions.ll | 103 + test/CodeGen/X86/atomic-dagsched.ll | 110 + test/CodeGen/X86/atomic-load-store-wide.ll | 2 +- test/CodeGen/X86/atomic-load-store.ll | 2 +- test/CodeGen/X86/atomic-minmax-i6432.ll | 45 +- test/CodeGen/X86/atomic-or.ll | 2 +- test/CodeGen/X86/atomic-pointer.ll | 2 +- test/CodeGen/X86/atomic16.ll | 4 +- test/CodeGen/X86/atomic32.ll | 30 +- test/CodeGen/X86/atomic64.ll | 2 +- test/CodeGen/X86/atomic6432.ll | 2 +- test/CodeGen/X86/atomic8.ll | 4 +- test/CodeGen/X86/atomic_add.ll | 2 +- test/CodeGen/X86/atomic_op.ll | 2 +- test/CodeGen/X86/avx-cvt.ll | 10 +- test/CodeGen/X86/avx-intel-ocl.ll | 134 +- test/CodeGen/X86/avx-intrinsics-x86.ll | 7 +- test/CodeGen/X86/avx-load-store.ll | 44 +- test/CodeGen/X86/avx-sext.ll | 181 +- test/CodeGen/X86/avx-shift.ll | 11 +- test/CodeGen/X86/avx-shuffle.ll | 63 +- test/CodeGen/X86/avx-splat.ll | 18 +- test/CodeGen/X86/avx-vextractf128.ll | 18 + test/CodeGen/X86/avx-vpermil.ll | 4 +- test/CodeGen/X86/avx-zext.ll | 3 +- test/CodeGen/X86/avx2-conversions.ll | 41 + test/CodeGen/X86/avx2-logic.ll | 5 +- test/CodeGen/X86/avx2-shuffle.ll | 57 +- test/CodeGen/X86/blend-msb.ll | 6 +- test/CodeGen/X86/bmi.ll | 76 + test/CodeGen/X86/bool-simplify.ll | 86 +- test/CodeGen/X86/bt.ll | 100 +- test/CodeGen/X86/byval2.ll | 4 +- test/CodeGen/X86/byval3.ll | 4 +- test/CodeGen/X86/byval4.ll | 4 +- test/CodeGen/X86/byval5.ll | 4 +- test/CodeGen/X86/cas.ll | 73 + test/CodeGen/X86/clobber-fi0.ll | 37 + test/CodeGen/X86/cmp.ll | 15 + test/CodeGen/X86/coalesce-implicitdef.ll | 130 + test/CodeGen/X86/coldcc64.ll | 24 + test/CodeGen/X86/complex-fca.ll | 3 + test/CodeGen/X86/constant-pool-remat-0.ll | 1 + .../X86/convert-2-addr-3-addr-inc64.ll | 1 + test/CodeGen/X86/crash.ll | 2 +- test/CodeGen/X86/cvtv2f32.ll | 4 + test/CodeGen/X86/dagcombine-cse.ll | 1 + test/CodeGen/X86/dagcombine_unsafe_math.ll | 56 + test/CodeGen/X86/dbg-at-specficiation.ll | 2 +- test/CodeGen/X86/dbg-byval-parameter.ll | 30 +- test/CodeGen/X86/dbg-const-int.ll | 19 +- test/CodeGen/X86/dbg-const.ll | 20 +- test/CodeGen/X86/dbg-declare-arg.ll | 58 +- test/CodeGen/X86/dbg-declare.ll | 12 +- test/CodeGen/X86/dbg-file-name.ll | 14 +- test/CodeGen/X86/dbg-i128-const.ll | 23 +- test/CodeGen/X86/dbg-large-unsigned-const.ll | 38 +- test/CodeGen/X86/dbg-merge-loc-entry.ll | 41 +- test/CodeGen/X86/dbg-prolog-end.ll | 22 +- test/CodeGen/X86/dbg-subrange.ll | 19 +- test/CodeGen/X86/dbg-value-dag-combine.ll | 27 +- .../X86/dbg-value-inlined-parameter.ll | 87 - test/CodeGen/X86/dbg-value-isel.ll | 29 +- test/CodeGen/X86/dbg-value-location.ll | 34 +- test/CodeGen/X86/dbg-value-range.ll | 28 +- test/CodeGen/X86/divide-by-constant.ll | 6 +- test/CodeGen/X86/dwarf-comp-dir.ll | 6 +- test/CodeGen/X86/dynamic-allocas-VLAs.ll | 12 +- test/CodeGen/X86/early-ifcvt-crash.ll | 2 + test/CodeGen/X86/early-ifcvt.ll | 31 + test/CodeGen/X86/fast-isel-args-fail.ll | 23 + test/CodeGen/X86/fast-isel-args.ll | 25 + test/CodeGen/X86/fast-isel-constant.ll | 24 + test/CodeGen/X86/fast-isel-expect.ll | 21 + test/CodeGen/X86/fast-isel-x86-64.ll | 4 +- test/CodeGen/X86/float-asmprint.ll | 40 + test/CodeGen/X86/fma4-intrinsics-x86_64.ll | 20 + test/CodeGen/X86/fold-call.ll | 25 +- test/CodeGen/X86/fold-load-vec.ll | 39 + test/CodeGen/X86/fold-pcmpeqd-2.ll | 10 +- test/CodeGen/X86/fold-vex.ll | 16 + test/CodeGen/X86/fp-fast.ll | 2 +- test/CodeGen/X86/fp-load-trunc.ll | 4 +- test/CodeGen/X86/handle-move.ll | 4 +- test/CodeGen/X86/hipe-cc.ll | 77 + test/CodeGen/X86/hipe-cc64.ll | 87 + test/CodeGen/X86/hipe-prologue.ll | 67 + test/CodeGen/X86/hoist-invariant-load.ll | 1 + test/CodeGen/X86/imul-lea-2.ll | 18 +- test/CodeGen/X86/imul-lea.ll | 10 +- test/CodeGen/X86/imul64-lea.ll | 25 + test/CodeGen/X86/insertelement-copytoregs.ll | 3 +- test/CodeGen/X86/lea-2.ll | 18 +- test/CodeGen/X86/lea-4.ll | 24 +- test/CodeGen/X86/legalize-shift-64.ll | 11 + test/CodeGen/X86/licm-nested.ll | 1 + test/CodeGen/X86/lit.local.cfg | 2 +- test/CodeGen/X86/memcpy-2.ll | 82 +- test/CodeGen/X86/memcpy.ll | 26 + .../X86/memset-sse-stack-realignment.ll | 77 + test/CodeGen/X86/memset.ll | 41 +- test/CodeGen/X86/memset64-on-x86-32.ll | 1 + test/CodeGen/X86/misched-crash.ll | 40 + test/CodeGen/X86/misched-ilp.ll | 4 +- test/CodeGen/X86/misched-matmul.ll | 228 + test/CodeGen/X86/misched-matrix.ll | 195 + test/CodeGen/X86/misched-new.ll | 56 + test/CodeGen/X86/movgs.ll | 6 +- test/CodeGen/X86/ms-inline-asm.ll | 49 +- test/CodeGen/X86/multiple-loop-post-inc.ll | 2 +- test/CodeGen/X86/no-cmov.ll | 11 + test/CodeGen/X86/phi-immediate-factoring.ll | 1 + test/CodeGen/X86/pmovsx-inreg.ll | 176 + test/CodeGen/X86/pointer-vector.ll | 2 +- test/CodeGen/X86/pr10475.ll | 30 + test/CodeGen/X86/pr10499.ll | 14 + test/CodeGen/X86/pr10523.ll | 18 + test/CodeGen/X86/pr10524.ll | 14 + test/CodeGen/X86/pr10525.ll | 13 + test/CodeGen/X86/pr10526.ll | 13 + test/CodeGen/X86/pr11998.ll | 18 + test/CodeGen/X86/pr14314.ll | 8 +- test/CodeGen/X86/pr14562.ll | 15 + test/CodeGen/X86/pr15267.ll | 66 + test/CodeGen/X86/pr15296.ll | 46 + test/CodeGen/X86/pr15309.ll | 15 + test/CodeGen/X86/pr3522.ll | 1 + test/CodeGen/X86/pre-ra-sched.ll | 56 + test/CodeGen/X86/prefetch.ll | 3 + test/CodeGen/X86/psubus.ll | 340 + test/CodeGen/X86/rdrand.ll | 2 +- test/CodeGen/X86/rdseed.ll | 48 + test/CodeGen/X86/regpressure.ll | 1 + test/CodeGen/X86/ret-mmx.ll | 2 +- test/CodeGen/X86/rip-rel-lea.ll | 16 + test/CodeGen/X86/sandybridge-loads.ll | 39 + test/CodeGen/X86/select.ll | 4 +- test/CodeGen/X86/sext-load.ll | 25 +- test/CodeGen/X86/sibcall.ll | 4 +- test/CodeGen/X86/sincos-opt.ll | 66 + test/CodeGen/X86/sink-hoist.ll | 2 +- test/CodeGen/X86/sse-align-2.ll | 11 +- test/CodeGen/X86/sse-domains.ll | 4 +- test/CodeGen/X86/sse2-blend.ll | 24 +- test/CodeGen/X86/sse2-mul.ll | 14 + test/CodeGen/X86/stack-align-memcpy.ll | 18 + test/CodeGen/X86/stack-protector.ll | 3155 +- test/CodeGen/X86/stack-update-frame-opcode.ll | 31 + test/CodeGen/X86/store_op_load_fold.ll | 19 +- test/CodeGen/X86/subtarget-feature-change.ll | 66 + test/CodeGen/X86/tailcall-fastisel.ll | 5 +- test/CodeGen/X86/tailcall-structret.ll | 3 +- test/CodeGen/X86/tailcallbyval.ll | 7 +- test/CodeGen/X86/tailcallfp.ll | 3 +- test/CodeGen/X86/tailcallpic1.ll | 3 +- test/CodeGen/X86/tailcallpic2.ll | 5 +- test/CodeGen/X86/thiscall-struct-return.ll | 47 - test/CodeGen/X86/tls.ll | 40 +- test/CodeGen/X86/twoaddr-coalesce-2.ll | 1 + test/CodeGen/X86/twoaddr-pass-sink.ll | 1 + test/CodeGen/X86/unknown-location.ll | 18 +- test/CodeGen/X86/v8i1-masks.ll | 39 + test/CodeGen/X86/vec_align_i256.ll | 14 + test/CodeGen/X86/vec_compare.ll | 24 + test/CodeGen/X86/vec_floor.ll | 144 + test/CodeGen/X86/vec_fpext.ll | 4 +- test/CodeGen/X86/vec_insert-6.ll | 1 + test/CodeGen/X86/vec_sdiv_to_shift.ll | 72 + test/CodeGen/X86/vec_shuffle-19.ll | 1 + test/CodeGen/X86/vec_shuffle-20.ll | 3 +- test/CodeGen/X86/vec_splat-2.ll | 9 +- test/CodeGen/X86/vec_splat-3.ll | 219 +- test/CodeGen/X86/vec_splat-4.ll | 104 - test/CodeGen/X86/vec_splat.ll | 16 +- test/CodeGen/X86/vec_zero.ll | 2 +- test/CodeGen/X86/vector-gep.ll | 5 +- test/CodeGen/X86/vselect-minmax.ll | 2788 + test/CodeGen/X86/vsplit-and.ll | 2 +- test/CodeGen/X86/wide-fma-contraction.ll | 20 + test/CodeGen/X86/win32_sret.ll | 121 +- test/CodeGen/X86/win_ftol2.ll | 14 +- test/CodeGen/X86/x86-64-dead-stack-adjust.ll | 2 +- test/CodeGen/X86/x86-64-ptr-arg-simple.ll | 29 + test/CodeGen/X86/x86-64-sret-return.ll | 18 +- test/CodeGen/X86/xtest.ll | 11 + test/CodeGen/X86/zero-remat.ll | 1 + .../XCore/2010-04-07-DbgValueOtherTargets.ll | 28 - test/CodeGen/XCore/DbgValueOtherTargets.test | 1 + test/CodeGen/XCore/aliases.ll | 32 + test/CodeGen/XCore/lit.local.cfg | 2 +- .../2009-11-03-InsertExtractValue.ll | 17 +- .../2009-11-05-DeadGlobalVariable.ll | 10 +- test/DebugInfo/2009-11-10-CurrentFn.ll | 10 +- test/DebugInfo/2010-03-24-MemberFn.ll | 39 +- .../2010-03-30-InvalidDbgInfoCrash.ll | 2 +- test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll | 45 +- test/DebugInfo/2010-04-19-FramePtr.ll | 15 +- test/DebugInfo/2010-05-03-OriginDIE.ll | 2 +- test/DebugInfo/2010-05-10-MultipleCU.ll | 28 +- .../DebugInfo/2010-06-29-InlinedFnLocalVar.ll | 41 +- test/DebugInfo/2010-10-01-crash.ll | 1 + test/DebugInfo/AArch64/cfi-frame.ll | 58 + test/DebugInfo/AArch64/dwarfdump.ll | 34 + test/DebugInfo/AArch64/eh_frame.ll | 51 + .../DebugInfo/AArch64/eh_frame_personality.ll | 46 + test/DebugInfo/AArch64/lit.local.cfg | 6 + test/DebugInfo/AArch64/variable-loc.ll | 98 + test/DebugInfo/Inputs/dwarfdump-inl-test.cc | 15 + .../Inputs/dwarfdump-inl-test.elf-x86-64 | Bin 7468 -> 9024 bytes test/DebugInfo/Inputs/dwarfdump-inl-test.h | 9 + test/DebugInfo/Inputs/dwarfdump-pubnames.cc | 32 + .../Inputs/dwarfdump-pubnames.elf-x86-64 | Bin 0 -> 5280 bytes .../Inputs/dwarfdump-test-32bit.elf.c | 14 + .../Inputs/dwarfdump-test-32bit.elf.o | Bin 0 -> 2432 bytes test/DebugInfo/Inputs/dwarfdump-test.cc | 23 + .../Inputs/dwarfdump-test.elf-x86-64 | Bin 10174 -> 9640 bytes .../Inputs/dwarfdump-test2-helper.cc | 3 + test/DebugInfo/Inputs/dwarfdump-test2-main.cc | 11 + .../Inputs/dwarfdump-test2.elf-x86-64 | Bin 7702 -> 9160 bytes test/DebugInfo/Inputs/dwarfdump-test3-decl.h | 7 + test/DebugInfo/Inputs/dwarfdump-test3-decl2.h | 1 + test/DebugInfo/Inputs/dwarfdump-test3.cc | 12 + .../Inputs/dwarfdump-test3.elf-x86-64 | Bin 7339 -> 0 bytes .../Inputs/dwarfdump-test3.elf-x86-64 space | Bin 0 -> 8944 bytes test/DebugInfo/Inputs/dwarfdump-test4-decl.h | 1 + .../DebugInfo/Inputs/dwarfdump-test4-part1.cc | 8 + .../DebugInfo/Inputs/dwarfdump-test4-part2.cc | 2 + .../Inputs/dwarfdump-test4.elf-x86-64 | Bin 7689 -> 9368 bytes .../Inputs}/lit.local.cfg | 0 test/DebugInfo/Inputs/test-inline.o | Bin 0 -> 6040 bytes test/DebugInfo/Inputs/test-parameters.o | Bin 0 -> 5792 bytes test/DebugInfo/X86/2010-04-13-PubType.ll | 30 +- test/DebugInfo/X86/2010-08-10-DbgConstant.ll | 28 + .../X86/2011-09-26-GlobalVarContext.ll | 42 +- test/DebugInfo/X86/2011-12-16-BadStructRef.ll | 48 +- test/DebugInfo/X86/DW_AT_byte_size.ll | 25 +- .../DebugInfo/X86/DW_AT_location-reference.ll | 20 +- test/DebugInfo/X86/DW_AT_object_pointer.ll | 52 +- test/DebugInfo/X86/DW_AT_specification.ll | 34 +- test/DebugInfo/X86/DW_TAG_friend.ll | 31 +- test/DebugInfo/X86/aligned_stack_var.ll | 14 +- test/DebugInfo/X86/block-capture.ll | 71 +- test/DebugInfo/X86/concrete_out_of_line.ll | 81 +- .../X86/dbg-value-inlined-parameter.ll | 92 + .../X86/debug-info-block-captured-self.ll | 106 + test/DebugInfo/X86/debug-info-blocks.ll | 372 + .../DebugInfo/X86/debug-info-static-member.ll | 257 + test/DebugInfo/X86/debug_frame.ll | 11 +- test/DebugInfo/X86/elf-names.ll | 41 +- .../DebugInfo/X86/empty-and-one-elem-array.ll | 92 + test/DebugInfo/X86/empty-array.ll | 45 + test/DebugInfo/X86/ending-run.ll | 17 +- test/DebugInfo/X86/enum-class.ll | 32 +- test/DebugInfo/X86/enum-fwd-decl.ll | 14 +- test/DebugInfo/X86/fission-cu.ll | 100 + test/DebugInfo/X86/line-info.ll | 58 + test/DebugInfo/X86/linkage-name.ll | 30 +- test/DebugInfo/X86/lit.local.cfg | 2 +- test/DebugInfo/X86/low-pc-cu.ll | 14 +- test/DebugInfo/X86/main-file-name.s | 17 + test/DebugInfo/X86/misched-dbg-value.ll | 174 + test/DebugInfo/X86/multiple-at-const-val.ll | 61 + .../X86/nondefault-subrange-array.ll | 48 + test/DebugInfo/X86/objc-fwd-decl.ll | 19 +- test/DebugInfo/X86/op_deref.ll | 27 +- test/DebugInfo/X86/pointer-type-size.ll | 25 +- test/DebugInfo/X86/pr11300.ll | 33 +- test/DebugInfo/X86/pr12831.ll | 52 +- test/DebugInfo/X86/pr13303.ll | 26 + test/DebugInfo/X86/pr9951.ll | 13 +- test/DebugInfo/X86/prologue-stack.ll | 12 +- test/DebugInfo/X86/rvalue-ref.ll | 17 +- .../X86/stmt-list-multiple-compile-units.ll | 67 + test/DebugInfo/X86/stmt-list.ll | 11 +- test/DebugInfo/X86/stringpool.ll | 19 +- test/DebugInfo/X86/struct-loc.ll | 21 +- test/DebugInfo/X86/subrange-type.ll | 38 + test/DebugInfo/X86/subreg.ll | 17 +- test/DebugInfo/X86/union-template.ll | 58 + test/DebugInfo/X86/vector.ll | 28 + test/DebugInfo/array.ll | 22 +- test/DebugInfo/debuglineinfo.test | 49 + test/DebugInfo/dwarf-public-names.ll | 125 + .../dwarfdump-debug-frame-simple.test | 28 + test/DebugInfo/dwarfdump-dump-flags.test | 13 + test/DebugInfo/dwarfdump-inlining.test | 28 +- test/DebugInfo/dwarfdump-pubnames.test | 16 + test/DebugInfo/dwarfdump-test.test | 56 +- test/DebugInfo/inlined-vars.ll | 39 +- test/DebugInfo/llvm-symbolizer.test | 25 + test/DebugInfo/member-pointers.ll | 36 + test/DebugInfo/namespace.ll | 42 + test/DebugInfo/printdbginfo2.ll | 66 - test/DebugInfo/two-cus-from-same-file.ll | 71 + .../MCJIT/2002-12-16-ArgTest.ll | 2 +- .../MCJIT/2003-01-04-ArgumentBug.ll | 2 +- .../MCJIT/2003-01-04-LoopTest.ll | 2 +- .../MCJIT/2003-01-04-PhiTest.ll | 2 +- .../MCJIT/2003-01-09-SARTest.ll | 2 +- .../ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll | 2 +- .../MCJIT/2003-01-15-AlignmentTest.ll | 2 +- .../MCJIT/2003-05-06-LivenessClobber.ll | 2 +- .../MCJIT/2003-05-07-ArgumentTest.ll | 2 +- .../MCJIT/2003-05-11-PHIRegAllocBug.ll | 2 +- .../MCJIT/2003-06-04-bzip2-bug.ll | 2 +- .../MCJIT/2003-06-05-PHIBug.ll | 2 +- .../MCJIT/2003-08-15-AllocaAssertion.ll | 2 +- .../MCJIT/2003-08-21-EnvironmentTest.ll | 2 +- .../2003-08-23-RegisterAllocatePhysReg.ll | 2 +- ...8-PHINode-ConstantExpr-CondCode-Failure.ll | 2 +- .../MCJIT/2005-12-02-TailCallBug.ll | 2 +- .../MCJIT/2007-12-10-APIntLoadStore.ll | 2 +- .../MCJIT/2008-06-05-APInt-OverAShr.ll | 2 +- .../MCJIT/2010-01-15-UndefValue.ll | 2 +- test/ExecutionEngine/MCJIT/fpbitcast.ll | 2 +- test/ExecutionEngine/MCJIT/hello.ll | 2 +- test/ExecutionEngine/MCJIT/hello2.ll | 2 +- test/ExecutionEngine/MCJIT/pr13727.ll | 2 +- test/ExecutionEngine/MCJIT/simplesttest.ll | 2 +- .../MCJIT/simpletest-remote.ll | 12 + test/ExecutionEngine/MCJIT/simpletest.ll | 2 +- test/ExecutionEngine/MCJIT/stubs-remote.ll | 36 + test/ExecutionEngine/MCJIT/stubs.ll | 2 +- test/ExecutionEngine/MCJIT/test-arith.ll | 2 +- test/ExecutionEngine/MCJIT/test-branch.ll | 2 +- .../MCJIT/test-call-no-external-funcs.ll | 2 +- test/ExecutionEngine/MCJIT/test-call.ll | 2 +- test/ExecutionEngine/MCJIT/test-cast.ll | 2 +- .../MCJIT/test-common-symbols-alignment.ll | 2 +- .../MCJIT/test-common-symbols-remote.ll | 89 + .../MCJIT/test-common-symbols.ll | 2 +- .../MCJIT/test-constantexpr.ll | 2 +- .../MCJIT/test-data-align-remote.ll | 16 + test/ExecutionEngine/MCJIT/test-data-align.ll | 2 +- .../MCJIT/test-fp-no-external-funcs-remote.ll | 22 + .../MCJIT/test-fp-no-external-funcs.ll | 2 +- test/ExecutionEngine/MCJIT/test-fp.ll | 2 +- .../MCJIT/test-global-ctors.ll | 21 + .../MCJIT/test-global-init-nonzero-remote.ll | 35 + .../MCJIT/test-global-init-nonzero.ll | 2 +- test/ExecutionEngine/MCJIT/test-global.ll | 2 +- test/ExecutionEngine/MCJIT/test-loadstore.ll | 2 +- test/ExecutionEngine/MCJIT/test-local.ll | 2 +- test/ExecutionEngine/MCJIT/test-logical.ll | 2 +- test/ExecutionEngine/MCJIT/test-loop.ll | 2 +- test/ExecutionEngine/MCJIT/test-phi.ll | 2 +- .../MCJIT/test-ptr-reloc-remote.ll | 17 + test/ExecutionEngine/MCJIT/test-ptr-reloc.ll | 2 +- test/ExecutionEngine/MCJIT/test-ret.ll | 2 +- test/ExecutionEngine/MCJIT/test-return.ll | 2 +- test/ExecutionEngine/MCJIT/test-setcond-fp.ll | 2 +- .../ExecutionEngine/MCJIT/test-setcond-int.ll | 2 +- test/ExecutionEngine/MCJIT/test-shift.ll | 2 +- test/ExecutionEngine/lit.local.cfg | 4 +- .../test-interp-vec-loadstore.ll | 84 + test/Feature/attributes.ll | 15 + test/Feature/const_pv.ll | 2 +- test/Feature/global_pv.ll | 4 +- test/Feature/intrinsics.ll | 6 +- test/Feature/minsize_attr.ll | 3 +- test/Feature/properties.ll | 1 - test/FileCheck/dos-style-eol.txt | 11 + test/FileCheck/lit.local.cfg | 1 + test/FileCheck/next-no-match.txt | 9 + test/FileCheck/regex-brackets.txt | 7 + test/FileCheck/regex-no-match.txt | 5 + test/FileCheck/simple-var-capture.txt | 13 + test/FileCheck/two-checks-for-same-match.txt | 8 + test/FileCheck/var-ref-same-line.txt | 16 + .../AddressSanitizer/X86/bug_11395.ll | 4 +- .../adaptive_global_redzones.ll | 57 + .../AddressSanitizer/asan-vs-gvn.ll | 6 +- .../Instrumentation/AddressSanitizer/basic.ll | 61 +- .../AddressSanitizer/debug_info.ll | 61 + .../different_scale_and_offset.ll | 41 + .../do-not-instrument-internal-globals.ll | 7 +- .../AddressSanitizer/instrument-no-return.ll | 38 +- .../AddressSanitizer/instrument_global.ll | 2 +- .../instrument_initializer_metadata.ll | 49 +- .../instrument_load_then_store.ll | 2 +- .../AddressSanitizer/lifetime.ll | 84 + .../AddressSanitizer/test64.ll | 10 +- .../MemorySanitizer/lit.local.cfg | 1 + .../MemorySanitizer/msan_basic.ll | 625 + .../MemorySanitizer/unreachable.ll | 39 + .../Instrumentation/ThreadSanitizer/atomic.ll | 250 +- .../ThreadSanitizer/read_from_global.ll | 2 +- .../ThreadSanitizer/tsan-vs-gvn.ll | 26 + .../ThreadSanitizer/tsan_basic.ll | 33 + .../ThreadSanitizer/vptr_read.ll | 13 + test/Integer/properties_bt.ll | 2 - test/JitListener/lit.local.cfg | 11 + test/JitListener/test-common-symbols.ll | 113 + test/JitListener/test-inline.ll | 212 + test/JitListener/test-parameters.ll | 211 + test/Linker/2006-01-19-ConstantPacked.ll | 5 +- test/Linker/DbgDeclare.ll | 58 + test/Linker/DbgDeclare2.ll | 76 + test/Linker/module-flags-1-a.ll | 4 +- test/Linker/module-flags-3-a.ll | 8 +- test/Linker/module-flags-7-a.ll | 9 + test/Linker/module-flags-7-b.ll | 6 + test/Linker/module-flags-8-a.ll | 14 + test/Linker/module-flags-8-b.ll | 7 + test/Linker/testlink1.ll | 6 + test/Linker/testlink2.ll | 3 + test/MC/AArch64/basic-a64-diagnostics.s | 3713 + test/MC/AArch64/basic-a64-instructions.s | 4819 ++ test/MC/AArch64/elf-globaladdress.ll | 111 + test/MC/AArch64/elf-objdump.s | 5 + test/MC/AArch64/elf-reloc-addsubimm.s | 13 + test/MC/AArch64/elf-reloc-condbr.s | 13 + test/MC/AArch64/elf-reloc-ldrlit.s | 28 + test/MC/AArch64/elf-reloc-ldstunsimm.s | 34 + test/MC/AArch64/elf-reloc-movw.s | 98 + test/MC/AArch64/elf-reloc-pcreladdressing.s | 29 + test/MC/AArch64/elf-reloc-tstb.s | 18 + test/MC/AArch64/elf-reloc-uncondbrimm.s | 18 + test/MC/AArch64/gicv3-regs-diagnostics.s | 61 + test/MC/AArch64/gicv3-regs.s | 223 + test/MC/AArch64/lit.local.cfg | 5 + test/MC/AArch64/mapping-across-sections.s | 28 + test/MC/AArch64/mapping-within-section.s | 23 + test/MC/AArch64/tls-relocs.s | 662 + test/MC/AArch64/trace-regs-diagnostics.s | 156 + test/MC/AArch64/trace-regs.s | 766 + .../2013-03-18-Br-to-label-named-like-reg.s | 5 + .../MC/ARM/AlignedBundling/group-bundle-arm.s | 48 + test/MC/ARM/AlignedBundling/lit.local.cfg | 6 + .../AlignedBundling/pad-align-to-bundle-end.s | 41 + test/MC/ARM/arm_instructions.s | 13 +- test/MC/ARM/basic-arm-instructions.s | 47 + test/MC/ARM/basic-thumb2-instructions.s | 30 + test/MC/ARM/data-in-code.ll | 176 + test/MC/ARM/elf-eflags-eabi-cg.ll | 13 + test/MC/ARM/elf-reloc-01.ll | 4 +- test/MC/ARM/elf-reloc-02.ll | 6 +- test/MC/ARM/elf-reloc-03.ll | 6 +- test/MC/ARM/elf-reloc-condcall.s | 12 +- test/MC/ARM/elf-thumbfunc-reloc.ll | 4 +- test/MC/ARM/elf-thumbfunc.s | 2 +- test/MC/ARM/mapping-within-section.s | 33 + test/MC/ARM/multi-section-mapping.s | 35 + test/MC/ARM/neon-bitwise-encoding.s | 68 +- test/MC/ARM/neon-vld-encoding.s | 257 +- test/MC/ARM/neon-vst-encoding.s | 183 +- test/MC/ARM/neont2-vld-encoding.s | 96 +- test/MC/ARM/neont2-vst-encoding.s | 84 +- test/MC/ARM/relocated-mapping.s | 11 + test/MC/AsmParser/align_invalid.s | 10 + test/MC/AsmParser/directive_values.s | 6 + test/MC/AsmParser/section_names.s | 62 + test/MC/COFF/symbol-alias.s | 11 + .../COFF/weak-symbol-section-specification.ll | 23 + .../AArch64/a64-ignored-fields.txt | 8 + .../AArch64/basic-a64-instructions.txt | 4200 + .../AArch64/basic-a64-undefined.txt | 43 + .../AArch64/basic-a64-unpredictable.txt | 96 + test/MC/Disassembler/AArch64/gicv3-regs.txt | 222 + .../AArch64/ldp-offset-predictable.txt | 7 + .../AArch64/ldp-postind.predictable.txt | 17 + .../AArch64/ldp-preind.predictable.txt | 17 + test/MC/Disassembler/AArch64/lit.local.cfg | 6 + test/MC/Disassembler/AArch64/trace-regs.txt | 736 + test/MC/Disassembler/ARM/hex-immediates.txt | 5 + .../ARM/invalid-VST1d8Twb_register-thumb.txt | 2 +- test/MC/Disassembler/ARM/neon-tests.txt | 6 +- test/MC/Disassembler/ARM/neon.txt | 114 +- .../Disassembler/ARM/neont-VLD-reencoding.txt | 26 +- .../Disassembler/ARM/neont-VST-reencoding.txt | 26 +- test/MC/Disassembler/ARM/neont2.txt | 122 +- test/MC/Disassembler/ARM/thumb2.txt | 5 + .../MC/Disassembler/ARM/unpredictable-BFI.txt | 11 + test/MC/Disassembler/Mips/mips32.txt | 6 + test/MC/Disassembler/Mips/mips32_le.txt | 6 + test/MC/Disassembler/Mips/mips64.txt | 134 +- test/MC/Disassembler/Mips/mips64_le.txt | 134 +- test/MC/Disassembler/Mips/mips64r2.txt | 182 +- test/MC/Disassembler/Mips/mips64r2_le.txt | 182 +- test/MC/Disassembler/X86/enhanced.txt | 10 - test/MC/Disassembler/X86/hex-immediates.txt | 10 + test/MC/Disassembler/X86/intel-syntax-32.txt | 13 + test/MC/Disassembler/X86/simple-tests.txt | 25 +- test/MC/Disassembler/X86/x86-32.txt | 28 +- test/MC/Disassembler/X86/x86-64.txt | 73 +- test/MC/Disassembler/XCore/lit.local.cfg | 5 + test/MC/Disassembler/XCore/xcore.txt | 695 + test/MC/ELF/cfi-register.s | 42 + test/MC/ELF/cfi-undefined.s | 41 + test/MC/ELF/comp-dir.s | 7 + test/MC/ELF/gen-dwarf.s | 44 +- test/MC/ELF/many-sections-2.s | 65281 ++++++++++++++++ test/MC/ELF/no-fixup.s | 15 +- test/MC/ELF/relax-all-flag.s | 19 + test/MC/MachO/ARM/lit.local.cfg | 2 +- test/MC/MachO/ARM/nop-armv4-padding.s | 2 +- test/MC/MachO/bad-dollar.s | 5 + test/MC/MachO/bad-macro.s | 14 + test/MC/MachO/gen-dwarf-cpp.s | 2 +- test/MC/MachO/gen-dwarf-macro-cpp.s | 2 +- test/MC/MachO/gen-dwarf-producer.s | 8 + test/MC/MachO/gen-dwarf.s | 4 +- test/MC/MachO/linker-option-1.s | 21 + test/MC/MachO/linker-option-2.s | 25 + test/MC/MachO/linker-options.ll | 43 + test/MC/Mips/eh-frame.s | 167 + test/MC/Mips/elf-gprel-32-64.ll | 37 + test/MC/Mips/elf-reginfo.ll | 31 + test/MC/Mips/elf_eflags.ll | 66 + test/MC/Mips/elf_st_other.ll | 13 + test/MC/Mips/hilo-addressing.s | 11 + test/MC/Mips/mips-alu-instructions.s | 15 +- test/MC/Mips/mips-coprocessor-encodings.s | 3 +- test/MC/Mips/mips-expansions.s | 22 + test/MC/Mips/mips-jump-instructions.s | 48 +- test/MC/Mips/mips64-alu-instructions.s | 100 + test/MC/Mips/mips_directives.s | 35 +- test/MC/Mips/mips_gprel16.ll | 33 + test/MC/Mips/nabi-regs.s | 36 + test/MC/Mips/set-at-directive.s | 132 + test/MC/PowerPC/ppc64-initial-cfa.ll | 101 +- test/MC/PowerPC/ppc64-relocs-01.ll | 2 +- .../align-mode-argument-error.s | 8 + .../asm-printing-bundle-directives.s | 22 + .../autogen-inst-offset-align-to-end.s | 2899 + .../autogen-inst-offset-padding.s | 2674 + .../bundle-group-too-large-error.s | 17 + .../bundle-lock-option-error.s | 11 + .../X86/AlignedBundling/different-sections.s | 25 + test/MC/X86/AlignedBundling/lit.local.cfg | 6 + .../lock-without-bundle-mode-error.s | 10 + test/MC/X86/AlignedBundling/long-nop-pad.s | 27 + .../AlignedBundling/pad-align-to-bundle-end.s | 33 + .../X86/AlignedBundling/pad-bundle-groups.s | 46 + .../X86/AlignedBundling/relax-at-bundle-end.s | 16 + .../AlignedBundling/relax-in-bundle-group.s | 42 + .../AlignedBundling/single-inst-bundling.s | 47 + .../switch-section-locked-error.s | 16 + .../unlock-without-lock-error.s | 11 + test/MC/X86/fde-reloc.s | 11 + test/MC/X86/gnux32-dwarf-gen.s | 24 + test/MC/X86/intel-syntax-encoding.s | 21 + test/MC/X86/intel-syntax-hex.s | 26 + test/MC/X86/intel-syntax.s | 200 +- test/MC/X86/lit.local.cfg | 9 +- test/MC/X86/shuffle-comments.s | 271 + test/MC/X86/x86-32-avx.s | 60 +- test/MC/X86/x86-32-coverage.s | 120 +- test/MC/X86/x86-32-ms-inline-asm.s | 33 + test/MC/X86/x86-64.s | 10 +- test/MC/X86/x86_64-avx-encoding.s | 60 +- test/MC/X86/x86_64-fma4-encoding.s | 65 + test/MC/X86/x86_64-rand-encoding.s | 49 + test/MC/X86/x86_64-rtm-encoding.s | 4 + test/MC/X86/x86_errors.s | 2 +- test/MC/X86/x86_long_nop.s | 15 + test/Makefile | 16 +- test/Object/ARM/symbol-addr.ll | 12 + test/Object/Inputs/COFF/i386.yaml | 1 + test/Object/Inputs/coff_archive.lib | Bin 0 -> 41196 bytes test/Object/Inputs/liblong_filenames.a | Bin 0 -> 10920 bytes test/Object/Inputs/libsimple_archive.a | Bin 0 -> 1596 bytes .../Inputs/macho-text-sections.macho-x86_64 | Bin 0 -> 268 bytes test/Object/Inputs/program-headers.elf-i386 | Bin 0 -> 987 bytes test/Object/Inputs/program-headers.elf-x86-64 | Bin 0 -> 1108 bytes .../Inputs/trivial-object-test.elf-mips64el | Bin 0 -> 1064 bytes test/Object/Mips/feature.test | 4 +- test/Object/X86/macho-text-sections.test | 3 + test/Object/archive-long-index.test | 40 + test/Object/coff-archive.test | 225 + test/Object/obj2yaml.test | 170 + test/Object/objdump-private-headers.test | 18 + test/Object/objdump-relocations.test | 7 + test/Object/objdump-sectionheaders.test | 16 +- test/Object/readobj-elf-versioning.test | 49 +- test/Object/readobj-shared-object.test | 354 +- test/Object/readobj.test | 2 + test/Object/simple-archive.test | 12 + test/Object/yaml2obj-readobj.test | 5 + test/Other/2008-10-15-MissingSpace.ll | 8 +- test/Other/close-stderr.ll | 7 + test/Other/constant-fold-gep.ll | 136 +- test/Other/extract-linkonce.ll | 23 + test/Scripts/elf-dump | 48 +- test/TableGen/2006-09-18-LargeInt.td | 1 - test/TableGen/2010-03-24-PrematureDefaults.td | 1 - test/TableGen/Dag.td | 13 +- test/TableGen/DefmInherit.td | 1 - test/TableGen/DefmInsideMultiClass.td | 1 - test/TableGen/ForeachList.td | 1 - test/TableGen/ForeachLoop.td | 1 - test/TableGen/LazyChange.td | 1 - test/TableGen/LetInsideMultiClasses.td | 1 - test/TableGen/ListOfList.td | 1 - test/TableGen/LoLoL.td | 1 - test/TableGen/MultiClass.td | 1 - test/TableGen/MultiClassDefName.td | 1 - test/TableGen/MultiClassInherit.td | 1 - test/TableGen/MultiPat.td | 1 - test/TableGen/NestedForeach.td | 1 - test/TableGen/Paste.td | 1 - test/TableGen/SetTheory.td | 1 - test/TableGen/SiblingForeach.td | 1 - test/TableGen/Slice.td | 9 +- test/TableGen/TargetInstrSpec.td | 1 - test/TableGen/TwoLevelName.td | 1 - test/TableGen/cast.td | 1 - test/TableGen/defmclass.td | 1 - test/TableGen/eq.td | 1 - test/TableGen/eqbit.td | 1 - test/TableGen/foreach.td | 1 - test/TableGen/if.td | 1 - test/TableGen/ifbit.td | 1 - test/TableGen/lisp.td | 1 - test/TableGen/list-element-bitref.td | 7 +- test/TableGen/math.td | 18 + test/TableGen/pr8330.td | 1 - test/TableGen/strconcat.td | 1 - test/TableGen/subst.td | 1 - test/TableGen/subst2.td | 1 - test/TableGen/usevalname.td | 1 - .../2008-02-01-ReturnAttrs.ll | 18 +- test/Transforms/ArgumentPromotion/crash.ll | 8 +- test/Transforms/BBVectorize/X86/pr15289.ll | 98 + test/Transforms/BBVectorize/X86/simple-int.ll | 79 + test/Transforms/BBVectorize/cycle.ll | 2 +- test/Transforms/BBVectorize/ld1.ll | 2 +- test/Transforms/BBVectorize/loop1.ll | 4 +- test/Transforms/BBVectorize/req-depth.ll | 4 +- test/Transforms/BBVectorize/search-limit.ll | 4 +- test/Transforms/BBVectorize/simple-int.ll | 38 +- test/Transforms/BBVectorize/simple-ldstr.ll | 4 +- test/Transforms/BBVectorize/simple-sel.ll | 4 +- test/Transforms/BBVectorize/simple.ll | 2 +- test/Transforms/BBVectorize/simple3.ll | 2 +- test/Transforms/CodeGenPrepare/basic.ll | 2 +- test/Transforms/ConstProp/2007-11-23-cttz.ll | 2 +- .../2003-10-28-MergeExternalConstants.ll | 2 +- .../ConstantMerge/2011-01-15-EitherOrder.ll | 2 +- test/Transforms/ConstantMerge/merge-both.ll | 2 +- test/Transforms/ConstantMerge/unnamed-addr.ll | 2 +- .../CorrelatedValuePropagation/basic.ll | 22 +- .../DeadArgElim/2007-12-20-ParamAttrs.ll | 22 +- .../DeadArgElim/2010-04-30-DbgInfo.ll | 9 +- test/Transforms/DeadArgElim/dbginfo.ll | 21 +- test/Transforms/DeadArgElim/deadexternal.ll | 2 +- test/Transforms/DeadArgElim/keepalive.ll | 7 +- .../DeadStoreElimination/const-pointers.ll | 2 +- .../DeadStoreElimination/dominate.ll | 2 +- .../DeadStoreElimination/no-targetdata.ll | 2 +- .../DeadStoreElimination/pr11390.ll | 2 +- test/Transforms/EarlyCSE/commute.ll | 4 +- test/Transforms/EarlyCSE/floatingpoint.ll | 14 + .../FunctionAttrs/2008-09-03-ReadNone.ll | 8 +- .../FunctionAttrs/2008-09-03-ReadOnly.ll | 10 +- .../FunctionAttrs/2009-01-04-Annotate.ll | 21 + test/Transforms/FunctionAttrs/annotate-1.ll | 18 + test/Transforms/FunctionAttrs/atomic.ll | 6 +- test/Transforms/FunctionAttrs/noreturn.ll | 18 + test/Transforms/GCOVProfiling/linkagename.ll | 27 + test/Transforms/GCOVProfiling/lit.local.cfg | 1 + test/Transforms/GCOVProfiling/version.ll | 29 + test/Transforms/GVN/2011-04-27-phioperands.ll | 2 +- test/Transforms/GVN/MemdepMiscompile.ll | 54 + test/Transforms/GVN/crash-no-aa.ll | 5 +- test/Transforms/GVN/crash.ll | 2 +- test/Transforms/GVN/edge.ll | 2 +- test/Transforms/GVN/fpmath.ll | 2 +- test/Transforms/GVN/lpre-call-wrap-2.ll | 2 +- test/Transforms/GVN/lpre-call-wrap.ll | 2 +- test/Transforms/GVN/nonescaping-malloc.ll | 1 + test/Transforms/GVN/null-aliases-nothing.ll | 2 +- test/Transforms/GVN/pr12979.ll | 2 +- test/Transforms/GVN/range.ll | 2 +- test/Transforms/GVN/rle.ll | 13 +- test/Transforms/GVN/tbaa.ll | 2 +- test/Transforms/GlobalOpt/2009-03-05-dbg.ll | 1 + .../GlobalOpt/2010-02-25-MallocPromote.ll | 2 +- .../GlobalOpt/2010-02-26-MallocSROA.ll | 2 +- test/Transforms/GlobalOpt/crash-2.ll | 19 + test/Transforms/GlobalOpt/crash.ll | 2 +- .../GlobalOpt/ctor-list-opt-constexpr.ll | 2 +- .../externally-initialized-global-ctr.ll | 35 + test/Transforms/GlobalOpt/integer-bool.ll | 27 +- test/Transforms/GlobalOpt/memset-null.ll | 2 +- test/Transforms/GlobalOpt/unnamed-addr.ll | 2 +- .../IPConstantProp/user-with-multiple-uses.ll | 8 +- .../IndVarSimplify/2003-09-23-NotAtTop.ll | 2 +- test/Transforms/IndVarSimplify/crash.ll | 2 +- .../IndVarSimplify/dont-recompute.ll | 69 + test/Transforms/IndVarSimplify/iv-zext.ll | 2 +- .../phi-uses-value-multiple-times.ll | 1 + .../Inline/2003-09-22-PHINodeInlineFail.ll | 10 +- .../2003-09-22-PHINodesInNormalInvokeDest.ll | 4 +- .../Inline/2006-11-09-InlineCGUpdate-2.ll | 1 - .../Inline/2006-11-09-InlineCGUpdate.ll | 1 - test/Transforms/Inline/2010-05-12-ValueMap.ll | 2 +- test/Transforms/Inline/alloca_test.ll | 2 +- test/Transforms/Inline/basictest.ll | 45 + test/Transforms/Inline/crash2.ll | 2 +- test/Transforms/Inline/delete-call.ll | 8 +- test/Transforms/Inline/devirtualize-3.ll | 2 +- test/Transforms/Inline/devirtualize.ll | 2 +- .../Transforms/Inline/gvn-inline-iteration.ll | 2 +- test/Transforms/Inline/inline-optsize.ll | 4 +- test/Transforms/Inline/inline_constprop.ll | 76 + test/Transforms/Inline/inline_invoke.ll | 12 +- test/Transforms/Inline/inline_minisize.ll | 232 + test/Transforms/Inline/inline_ssp.ll | 160 + .../Inline/lifetime-no-datalayout.ll | 23 + test/Transforms/Inline/lifetime.ll | 46 +- .../Inline/noinline-recursive-fn.ll | 2 +- test/Transforms/Inline/noinline.ll | 2 +- test/Transforms/Inline/recursive.ll | 2 +- .../InstCombine/2008-05-08-StrLenSink.ll | 2 +- .../InstCombine/2009-02-11-NotInitialized.ll | 14 + .../InstCombine/2010-03-03-ExtElim.ll | 2 +- .../2010-05-30-memcpy-Struct.ll | 6 +- .../InstCombine/2010-11-01-lshr-mask.ll | 4 +- .../InstCombine/2012-04-23-Neon-Intrinsics.ll | 6 +- .../InstCombine/2012-12-14-simp-vgep.ll | 10 + ...013-03-05-Combine-BitcastTy-Into-Alloca.ll | 45 + test/Transforms/InstCombine/abs-1.ll | 41 + test/Transforms/InstCombine/align-external.ll | 2 +- .../InstCombine/bitcast-bigendian.ll | 50 + .../InstCombine/bitcast-vector-fold.ll | 5 + test/Transforms/InstCombine/bitcast.ll | 21 +- test/Transforms/InstCombine/cast.ll | 14 +- test/Transforms/InstCombine/compare-signs.ll | 2 +- .../InstCombine/constant-expr-datalayout.ll | 12 + test/Transforms/InstCombine/cos-1.ll | 38 + test/Transforms/InstCombine/cos-2.ll | 17 + .../debug-line.ll | 2 +- test/Transforms/InstCombine/debuginfo.ll | 32 +- test/Transforms/InstCombine/devirt.ll | 2 +- .../InstCombine/disable-simplify-libcalls.ll | 99 + .../double-float-shrink-1.ll} | 262 +- .../InstCombine/double-float-shrink-2.ll | 80 + test/Transforms/InstCombine/exact.ll | 7 +- test/Transforms/InstCombine/exp2-1.ll | 76 + test/Transforms/InstCombine/exp2-2.ll | 17 + test/Transforms/InstCombine/fast-math.ll | 467 + test/Transforms/InstCombine/ffs-1.ll | 134 + test/Transforms/InstCombine/fmul.ll | 72 + test/Transforms/InstCombine/fold-phi.ll | 39 + test/Transforms/InstCombine/fpcast.ll | 19 + test/Transforms/InstCombine/fprintf-1.ll | 80 + test/Transforms/InstCombine/fputs-1.ll | 43 + test/Transforms/InstCombine/fwrite-1.ll | 57 + test/Transforms/InstCombine/getelementptr.ll | 20 +- test/Transforms/InstCombine/icmp.ll | 209 + test/Transforms/InstCombine/idioms.ll | 2 +- test/Transforms/InstCombine/intrinsics.ll | 38 +- test/Transforms/InstCombine/isascii-1.ll | 32 + test/Transforms/InstCombine/isdigit-1.ll | 48 + test/Transforms/InstCombine/load-cmp.ll | 12 + test/Transforms/InstCombine/load3.ll | 25 +- test/Transforms/InstCombine/logical-select.ll | 20 +- .../InstCombine/malloc-free-delete.ll | 29 + test/Transforms/InstCombine/memcmp-1.ll | 4 +- .../InstCombine/memcpy-from-global.ll | 10 + test/Transforms/InstCombine/mul.ll | 8 +- .../InstCombine/obfuscated_splat.ll | 2 +- test/Transforms/InstCombine/objsize.ll | 128 + test/Transforms/InstCombine/or.ll | 5 +- .../osx-names.ll | 2 +- test/Transforms/InstCombine/pow-1.ll | 154 + test/Transforms/InstCombine/pow-2.ll | 14 + test/Transforms/InstCombine/pr12338.ll | 42 +- test/Transforms/InstCombine/printf-1.ll | 119 + test/Transforms/InstCombine/printf-2.ll | 41 + test/Transforms/InstCombine/ptr-int-cast.ll | 31 + test/Transforms/InstCombine/puts-1.ll | 31 + test/Transforms/InstCombine/sdiv-1.ll | 4 +- test/Transforms/InstCombine/sext.ll | 9 + test/Transforms/InstCombine/shift.ll | 90 +- test/Transforms/InstCombine/signext.ll | 4 +- .../InstCombine/sink_instruction.ll | 2 +- test/Transforms/InstCombine/sprintf-1.ll | 100 + test/Transforms/InstCombine/sqrt.ll | 2 +- test/Transforms/InstCombine/store.ll | 34 + test/Transforms/InstCombine/strto-1.ll | 16 +- test/Transforms/InstCombine/toascii-1.ll | 59 + .../Transforms/InstCombine/vec_extract_elt.ll | 10 + test/Transforms/InstCombine/vector-casts.ll | 3 +- test/Transforms/InstCombine/vector-type.ll | 15 + test/Transforms/InstCombine/vector_gep1.ll | 9 +- test/Transforms/InstCombine/xor2.ll | 31 + .../InstCombine/zext-bool-add-sub.ll | 4 +- test/Transforms/InstSimplify/call-callconv.ll | 48 + test/Transforms/InstSimplify/call.ll | 103 + test/Transforms/InstSimplify/compare.ll | 86 + test/Transforms/InstSimplify/fast-math.ll | 107 + .../InstSimplify/floating-point-arithmetic.ll | 35 + test/Transforms/InstSimplify/past-the-end.ll | 77 + test/Transforms/InstSimplify/ptr_diff.ll | 30 + test/Transforms/InstSimplify/vector_gep.ll | 2 +- test/Transforms/JumpThreading/basic.ll | 40 +- .../JumpThreading/degenerate-phi.ll | 2 +- test/Transforms/JumpThreading/or-undef.ll | 2 +- .../LICM/2003-12-11-SinkingToPHI.ll | 2 +- test/Transforms/LICM/2011-07-06-Alignment.ll | 2 +- test/Transforms/LICM/crash.ll | 2 +- test/Transforms/LICM/hoist-invariant-load.ll | 1 + test/Transforms/LICM/hoisting.ll | 26 + test/Transforms/LICM/scalar_promote.ll | 127 +- .../LoopDeletion/2011-06-21-phioperands.ll | 2 +- .../LoopDeletion/simplify-then-delete.ll | 4 +- test/Transforms/LoopIdiom/X86/lit.local.cfg | 6 + test/Transforms/LoopIdiom/X86/popcnt.ll | 140 + test/Transforms/LoopRotate/basic.ll | 28 +- test/Transforms/LoopRotate/crash.ll | 2 +- test/Transforms/LoopRotate/dbgvalue.ll | 2 +- test/Transforms/LoopRotate/phi-duplicate.ll | 2 +- .../2012-07-18-LimitReassociate.ll | 14 +- .../LoopStrengthReduce/2013-01-05-IndBr.ll | 44 + .../2013-01-14-ReuseCast.ll | 84 + .../LoopStrengthReduce/ARM/ivchain-ARM.ll | 101 +- .../{ => X86}/2008-08-14-ShadowIV.ll | 2 +- .../{ => X86}/2011-07-20-DoubleIV.ll | 2 +- .../X86/2011-12-04-loserreg.ll | 13 +- .../LoopStrengthReduce/dominate-assert.ll | 2 +- .../exit_compare_live_range.ll | 2 +- .../LoopStrengthReduce/post-inc-icmpzero.ll | 9 +- test/Transforms/LoopUnroll/basic.ll | 23 + test/Transforms/LoopUnroll/runtime-loop3.ll | 1 + .../LoopUnswitch/2008-11-03-Invariant.ll | 1 + .../LoopUnswitch/2011-11-18-SimpleSwitch.ll | 10 +- .../2011-11-18-TwoSwitches-Threshold.ll | 10 +- .../LoopUnswitch/2011-11-18-TwoSwitches.ll | 12 +- test/Transforms/LoopUnswitch/basictest.ll | 39 +- test/Transforms/LoopUnswitch/infinite-loop.ll | 9 +- .../LoopUnswitch/preserve-analyses.ll | 2 +- .../LoopVectorize/12-12-11-if-conv.ll | 44 + .../LoopVectorize/2012-10-20-infloop.ll | 46 +- .../LoopVectorize/2012-10-22-isconsec.ll | 2 +- .../LoopVectorize/ARM/arm-unroll.ll | 32 + .../LoopVectorize/ARM/gcc-examples.ll | 60 + .../LoopVectorize/ARM/lit.local.cfg | 6 + .../LoopVectorize/ARM/mul-cast-vect.ll | 114 + .../LoopVectorize/ARM/width-detect.ll | 52 + test/Transforms/LoopVectorize/X86/avx1.ll | 4 +- .../X86/constant-vector-operand.ll | 28 + .../LoopVectorize/X86/conversion-cost.ll | 11 +- .../LoopVectorize/X86/cost-model.ll | 5 +- .../LoopVectorize/X86/gcc-examples.ll | 27 +- .../X86/min-trip-count-switch.ll | 28 + .../Transforms/LoopVectorize/X86/no-vector.ll | 22 + .../X86/parallel-loops-after-reg2mem.ll | 52 + .../LoopVectorize/X86/parallel-loops.ll | 114 + .../LoopVectorize/X86/reduction-crash.ll | 35 + .../LoopVectorize/X86/small-size.ll | 170 + .../LoopVectorize/X86/struct-store.ll | 27 + .../LoopVectorize/X86/unroll-small-loops.ll | 50 + .../LoopVectorize/X86/unroll_selection.ll | 71 + .../X86/vector-scalar-select-cost.ll | 66 + .../X86/vector_ptr_load_store.ll | 150 + .../LoopVectorize/bzip_reverse_loops.ll | 71 + test/Transforms/LoopVectorize/calloc.ll | 53 + .../LoopVectorize/cast-induction.ll | 30 + .../Transforms/LoopVectorize/cpp-new-array.ll | 4 +- test/Transforms/LoopVectorize/dbg.value.ll | 70 + test/Transforms/LoopVectorize/flags.ll | 2 +- .../LoopVectorize/float-reduction.ll | 29 + test/Transforms/LoopVectorize/gcc-examples.ll | 57 +- test/Transforms/LoopVectorize/global_alias.ll | 1078 + test/Transforms/LoopVectorize/i8-induction.ll | 35 + .../Transforms/LoopVectorize/if-conv-crash.ll | 39 + .../LoopVectorize/if-conversion-reduction.ll | 38 + .../Transforms/LoopVectorize/if-conversion.ll | 108 + test/Transforms/LoopVectorize/increment.ll | 2 +- .../LoopVectorize/induction_plus.ll | 5 +- test/Transforms/LoopVectorize/intrinsic.ll | 935 + test/Transforms/LoopVectorize/lcssa-crash.ll | 29 + .../LoopVectorize/no_int_induction.ll | 33 + test/Transforms/LoopVectorize/nofloat.ll | 29 + test/Transforms/LoopVectorize/non-const-n.ll | 2 +- test/Transforms/LoopVectorize/nsw-crash.ll | 25 + test/Transforms/LoopVectorize/phi-hang.ll | 29 + test/Transforms/LoopVectorize/ptr_loops.ll | 74 + test/Transforms/LoopVectorize/read-only.ll | 2 +- test/Transforms/LoopVectorize/reduction.ll | 95 +- .../Transforms/LoopVectorize/runtime-check.ll | 6 +- .../LoopVectorize/same-base-access.ll | 110 + .../Transforms/LoopVectorize/scalar-select.ll | 2 +- .../Transforms/LoopVectorize/simple-unroll.ll | 39 + test/Transforms/LoopVectorize/small-loop.ll | 2 +- .../LoopVectorize/start-non-zero.ll | 2 +- .../Transforms/LoopVectorize/struct_access.ll | 50 + .../LoopVectorize/vectorize-once.ll | 75 + test/Transforms/LoopVectorize/write-only.ll | 2 +- test/Transforms/Mem2Reg/ConvertDebugInfo.ll | 32 +- test/Transforms/Mem2Reg/ConvertDebugInfo2.ll | 27 +- test/Transforms/MemCpyOpt/memcpy.ll | 22 +- .../MergeFunc/2011-02-08-RemoveEqual.ll | 2 +- .../MergeFunc/2013-01-10-MergeFuncAssert.ll | 36 + test/Transforms/MergeFunc/phi-speculation1.ll | 1 + test/Transforms/MergeFunc/phi-speculation2.ll | 1 + test/Transforms/MergeFunc/vector.ll | 1 + .../MergeFunc/vectors-and-arrays.ll | 1 + test/Transforms/MetaRenamer/metarenamer.ll | 2 +- test/Transforms/ObjCARC/apelim.ll | 6 +- test/Transforms/ObjCARC/arc-annotations.ll | 307 + test/Transforms/ObjCARC/basic.ll | 55 +- test/Transforms/ObjCARC/cfg-hazards.ll | 42 +- ...-arc-used-intrinsic-removed-if-isolated.ll | 16 + test/Transforms/ObjCARC/contract-marker.ll | 4 +- .../ObjCARC/contract-storestrong.ll | 30 +- test/Transforms/ObjCARC/contract-testcases.ll | 5 +- test/Transforms/ObjCARC/contract.ll | 37 +- ...e-that-exception-unwind-path-is-visited.ll | 174 + test/Transforms/ObjCARC/escape.ll | 7 +- test/Transforms/ObjCARC/gvn.ll | 2 +- test/Transforms/ObjCARC/intrinsic-use.ll | 63 + test/Transforms/ObjCARC/invoke.ll | 16 +- .../move-and-form-retain-autorelease.ll | 6 +- test/Transforms/ObjCARC/nested.ll | 9 +- .../ObjCARC/no-objc-arc-exceptions.ll | 7 +- test/Transforms/ObjCARC/pr12270.ll | 2 +- .../Transforms/ObjCARC/retain-block-alloca.ll | 6 +- .../ObjCARC/retain-block-escape-analysis.ll | 127 + .../ObjCARC/retain-block-side-effects.ll | 5 +- test/Transforms/ObjCARC/retain-block.ll | 26 +- .../Transforms/ObjCARC/retain-not-declared.ll | 8 +- test/Transforms/ObjCARC/rle-s2l.ll | 7 +- test/Transforms/ObjCARC/rv.ll | 32 +- test/Transforms/ObjCARC/split-backedge.ll | 14 +- .../tail-call-invariant-enforcement.ll | 74 + test/Transforms/ObjCARC/weak-copies.ll | 6 +- .../2010-03-22-empty-baseclass.ll | 2 +- test/Transforms/PhaseOrdering/PR6627.ll | 2 +- test/Transforms/PhaseOrdering/basic.ll | 2 +- test/Transforms/PhaseOrdering/gdce.ll | 2 +- test/Transforms/PhaseOrdering/scev.ll | 2 +- test/Transforms/Reassociate/crash.ll | 2 +- test/Transforms/Reassociate/xor_reassoc.ll | 166 + test/Transforms/Reg2Mem/crash.ll | 88 + test/Transforms/Reg2Mem/lit.local.cfg | 1 + test/Transforms/SCCP/crash.ll | 2 +- test/Transforms/SCCP/ipsccp-addr-taken.ll | 2 +- test/Transforms/SCCP/retvalue-undef.ll | 2 +- test/Transforms/SCCP/undef-resolve.ll | 2 +- test/Transforms/SROA/basictest.ll | 97 +- test/Transforms/SROA/big-endian.ll | 16 +- test/Transforms/SROA/phi-and-select.ll | 12 +- test/Transforms/SROA/vector-promotion.ll | 164 +- test/Transforms/SROA/vectors-of-pointers.ll | 25 + .../ScalarRepl/2003-09-12-IncorrectPromote.ll | 5 +- test/Transforms/ScalarRepl/crash.ll | 4 +- .../ScalarRepl/debuginfo-preserved.ll | 21 +- test/Transforms/ScalarRepl/memcpy-align.ll | 2 +- test/Transforms/ScalarRepl/phi-cycle.ll | 5 +- test/Transforms/ScalarRepl/phi-select.ll | 2 +- test/Transforms/ScalarRepl/volatile.ll | 5 +- .../SimplifyCFG/2010-03-30-InvokeCrash.ll | 2 +- ...-EmptyBlockMerge.ll => EmptyBlockMerge.ll} | 5 +- .../{2002-06-24-PHINode.ll => PHINode.ll} | 3 +- test/Transforms/SimplifyCFG/PR9946.ll | 2 +- .../Transforms/SimplifyCFG/SpeculativeExec.ll | 41 + .../SimplifyCFG/X86/switch_to_lookup_table.ll | 26 + .../Transforms/SimplifyCFG/branch-fold-dbg.ll | 2 +- test/Transforms/SimplifyCFG/select-gep.ll | 2 +- .../SimplifyCFG/switch-on-const-select.ll | 9 +- test/Transforms/SimplifyCFG/trivial-throw.ll | 77 + .../SimplifyCFG/volatile-phioper.ll | 48 + .../SimplifyLibCalls/2009-01-04-Annotate.ll | 12 - .../2009-02-11-NotInitialized.ll | 13 - test/Transforms/SimplifyLibCalls/FFS.ll | 45 - test/Transforms/SimplifyLibCalls/FPrintF.ll | 28 - test/Transforms/SimplifyLibCalls/FPuts.ll | 29 - test/Transforms/SimplifyLibCalls/IsDigit.ll | 21 - test/Transforms/SimplifyLibCalls/Printf.ll | 37 - test/Transforms/SimplifyLibCalls/Puts.ll | 15 - test/Transforms/SimplifyLibCalls/SPrintF.ll | 40 - test/Transforms/SimplifyLibCalls/ToAscii.ll | 21 - test/Transforms/SimplifyLibCalls/abs.ll | 11 - test/Transforms/SimplifyLibCalls/cos.ll | 14 - test/Transforms/SimplifyLibCalls/exp2.ll | 38 - .../SimplifyLibCalls/float-shrink-compare.ll | 2 +- test/Transforms/SimplifyLibCalls/floor.ll | 85 - test/Transforms/SimplifyLibCalls/fwrite.ll | 13 - test/Transforms/SimplifyLibCalls/iprintf.ll | 71 - .../SimplifyLibCalls/pow-to-sqrt.ll | 33 - test/Transforms/SimplifyLibCalls/pow2.ll | 37 - .../StripSymbols/2010-08-25-crash.ll | 2 +- test/Transforms/StripSymbols/block-address.ll | 2 +- test/Transforms/TailCallElim/ackermann.ll | 1 + .../TailCallElim/dont-tce-tail-marked-call.ll | 4 +- test/Transforms/TailCallElim/dup_tail.ll | 5 +- .../TailCallElim/intervening-inst.ll | 3 +- .../TailCallElim/move_alloca_for_tail_call.ll | 2 +- test/Transforms/TailCallElim/nocapture.ll | 2 +- test/Transforms/TailCallElim/reorder_load.ll | 6 +- .../TailCallElim/return_constant.ll | 3 +- .../TailCallElim/trivial_codegen_tailcall.ll | 6 +- .../TailDup/2008-06-11-AvoidDupLoopHeader.ll | 1 + test/Unit/lit.cfg | 5 + test/Verifier/module-flags-1.ll | 60 + test/lit.cfg | 30 +- test/lit.site.cfg.in | 3 + test/tools/llvm-lit/chain.c | 9 + test/tools/llvm-lit/lit.local.cfg | 1 + .../tools/llvm-objdump/disassembly-show-raw.s | 15 + test/tools/llvm-objdump/lit.local.cfg | 6 + test/tools/llvm-objdump/win64-unwind-data.s | 106 + test/tools/llvm-readobj/Inputs/trivial.ll | 19 + .../llvm-readobj/Inputs/trivial.obj.coff-i386 | Bin 0 -> 314 bytes .../Inputs/trivial.obj.coff-x86-64 | Bin 0 -> 319 bytes .../llvm-readobj/Inputs/trivial.obj.elf-i386 | Bin 0 -> 896 bytes .../Inputs/trivial.obj.elf-x86-64 | Bin 0 -> 1256 bytes .../Inputs/trivial.obj.macho-i386 | Bin 0 -> 472 bytes .../Inputs/trivial.obj.macho-x86-64 | Bin 0 -> 532 bytes test/tools/llvm-readobj/file-headers.test | 100 + test/tools/llvm-readobj/lit.local.cfg | 1 + test/tools/llvm-readobj/relocations.test | 32 + test/tools/llvm-readobj/sections-ext.test | 175 + test/tools/llvm-readobj/sections.test | 113 + test/tools/llvm-readobj/symbols.test | 44 + tools/CMakeLists.txt | 11 +- tools/LLVMBuild.txt | 2 +- tools/Makefile | 8 +- tools/bugpoint-passes/CMakeLists.txt | 4 + tools/bugpoint-passes/TestPasses.cpp | 10 +- tools/bugpoint/BugDriver.cpp | 6 +- tools/bugpoint/BugDriver.h | 2 +- tools/bugpoint/CMakeLists.txt | 3 +- tools/bugpoint/CrashDebugger.cpp | 20 +- tools/bugpoint/ExecutionDriver.cpp | 2 +- tools/bugpoint/ExtractFunction.cpp | 26 +- tools/bugpoint/LLVMBuild.txt | 2 +- tools/bugpoint/ListReducer.h | 6 +- tools/bugpoint/Makefile | 2 +- tools/bugpoint/Miscompilation.cpp | 16 +- tools/bugpoint/OptimizerDriver.cpp | 12 +- tools/bugpoint/ToolRunner.cpp | 6 +- tools/bugpoint/ToolRunner.h | 2 +- tools/bugpoint/bugpoint.cpp | 7 +- tools/gold/gold-plugin.cpp | 9 +- tools/llc/CMakeLists.txt | 2 +- tools/llc/LLVMBuild.txt | 2 +- tools/llc/Makefile | 2 +- tools/llc/llc.cpp | 39 +- tools/lli/CMakeLists.txt | 4 +- tools/lli/LLVMBuild.txt | 2 +- tools/lli/Makefile | 4 +- tools/lli/RecordingMemoryManager.cpp | 63 +- tools/lli/RecordingMemoryManager.h | 13 +- tools/lli/RemoteTarget.h | 2 +- tools/lli/lli.cpp | 250 +- tools/llvm-ar/llvm-ar.cpp | 10 +- tools/llvm-as/llvm-as.cpp | 8 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 162 +- tools/llvm-diff/CMakeLists.txt | 2 +- tools/llvm-diff/DiffConsumer.cpp | 5 +- tools/llvm-diff/DiffConsumer.h | 5 +- tools/llvm-diff/DiffLog.cpp | 3 +- tools/llvm-diff/DifferenceEngine.cpp | 12 +- tools/llvm-diff/DifferenceEngine.h | 5 +- tools/llvm-diff/LLVMBuild.txt | 2 +- tools/llvm-diff/Makefile | 2 +- tools/llvm-diff/llvm-diff.cpp | 12 +- tools/llvm-dis/llvm-dis.cpp | 14 +- tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 126 +- tools/llvm-extract/CMakeLists.txt | 2 +- tools/llvm-extract/LLVMBuild.txt | 2 +- tools/llvm-extract/Makefile | 2 +- tools/llvm-extract/llvm-extract.cpp | 23 +- tools/llvm-jitlistener/CMakeLists.txt | 22 + tools/llvm-jitlistener/LLVMBuild.txt | 22 + tools/llvm-jitlistener/Makefile | 27 + tools/llvm-jitlistener/llvm-jitlistener.cpp | 207 + tools/llvm-link/CMakeLists.txt | 2 +- tools/llvm-link/LLVMBuild.txt | 2 +- tools/llvm-link/Makefile | 2 +- tools/llvm-link/llvm-link.cpp | 18 +- tools/llvm-mc/Disassembler.cpp | 180 +- tools/llvm-mc/Disassembler.h | 5 - tools/llvm-mc/llvm-mc.cpp | 80 +- tools/llvm-nm/llvm-nm.cpp | 49 +- tools/llvm-objdump/CMakeLists.txt | 2 + tools/llvm-objdump/COFFDump.cpp | 355 + tools/llvm-objdump/ELFDump.cpp | 100 + tools/llvm-objdump/MachODump.cpp | 59 +- tools/llvm-objdump/llvm-objdump.cpp | 97 +- tools/llvm-objdump/llvm-objdump.h | 11 + tools/llvm-prof/llvm-prof.cpp | 16 +- tools/llvm-ranlib/llvm-ranlib.cpp | 8 +- tools/llvm-readobj/CMakeLists.txt | 12 +- tools/llvm-readobj/COFFDumper.cpp | 1014 + tools/llvm-readobj/ELFDumper.cpp | 800 + tools/llvm-readobj/Error.cpp | 62 + tools/llvm-readobj/Error.h | 48 + tools/llvm-readobj/LLVMBuild.txt | 2 +- tools/llvm-readobj/MachODumper.cpp | 438 + tools/llvm-readobj/Makefile | 2 +- tools/llvm-readobj/ObjDumper.cpp | 33 + tools/llvm-readobj/ObjDumper.h | 60 + tools/llvm-readobj/StreamWriter.cpp | 79 + tools/llvm-readobj/StreamWriter.h | 282 + tools/llvm-readobj/llvm-readobj.cpp | 407 +- tools/llvm-readobj/llvm-readobj.h | 45 + tools/llvm-rtdyld/CMakeLists.txt | 2 +- tools/llvm-rtdyld/Makefile | 2 +- tools/llvm-rtdyld/llvm-rtdyld.cpp | 81 +- tools/llvm-size/llvm-size.cpp | 2 +- tools/llvm-stress/Makefile | 2 +- tools/llvm-stress/llvm-stress.cpp | 48 +- tools/llvm-symbolizer/CMakeLists.txt | 14 + tools/llvm-symbolizer/LLVMSymbolize.cpp | 292 + tools/llvm-symbolizer/LLVMSymbolize.h | 98 + tools/llvm-symbolizer/Makefile | 17 + tools/llvm-symbolizer/llvm-symbolizer.cpp | 119 + tools/lto/CMakeLists.txt | 3 + tools/lto/LTOCodeGenerator.cpp | 66 +- tools/lto/LTOCodeGenerator.h | 6 +- tools/lto/LTODisassembler.cpp | 26 + tools/lto/LTOModule.cpp | 61 +- tools/lto/LTOModule.h | 28 +- tools/lto/Makefile | 2 +- tools/lto/lto.cpp | 5 +- tools/lto/lto.exports | 2 + tools/macho-dump/macho-dump.cpp | 30 +- {utils => tools}/obj2yaml/CMakeLists.txt | 0 {utils => tools}/obj2yaml/Makefile | 2 +- {utils => tools}/obj2yaml/coff2yaml.cpp | 1 - {utils => tools}/obj2yaml/obj2yaml.cpp | 7 +- {utils => tools}/obj2yaml/obj2yaml.h | 5 +- tools/opt/AnalysisWrappers.cpp | 4 +- tools/opt/CMakeLists.txt | 3 +- tools/opt/GraphPrinters.cpp | 75 +- tools/opt/LLVMBuild.txt | 2 +- tools/opt/Makefile | 2 +- tools/opt/PrintSCC.cpp | 6 +- tools/opt/opt.cpp | 71 +- unittests/ADT/APFloatTest.cpp | 86 +- unittests/ADT/APIntTest.cpp | 22 +- unittests/ADT/CMakeLists.txt | 3 + unittests/ADT/MapVectorTest.cpp | 55 + unittests/ADT/OptionalTest.cpp | 284 + unittests/ADT/SCCIteratorTest.cpp | 4 +- unittests/ADT/SmallPtrSetTest.cpp | 55 + unittests/ADT/SmallStringTest.cpp | 4 +- unittests/ADT/SmallVectorTest.cpp | 4 +- unittests/ADT/SparseMultiSetTest.cpp | 235 + unittests/ADT/StringRefTest.cpp | 2 +- unittests/ADT/TinyPtrVectorTest.cpp | 42 +- unittests/ADT/TripleTest.cpp | 30 + unittests/ADT/TwineTest.cpp | 2 +- unittests/ADT/ilistTest.cpp | 58 +- unittests/Analysis/ScalarEvolutionTest.cpp | 12 +- unittests/Bitcode/BitReaderTest.cpp | 14 +- unittests/CMakeLists.txt | 3 +- unittests/ExecutionEngine/CMakeLists.txt | 8 +- .../ExecutionEngine/ExecutionEngineTest.cpp | 10 +- unittests/ExecutionEngine/JIT/CMakeLists.txt | 3 + .../JIT/JITEventListenerTest.cpp | 9 +- .../JIT/JITEventListenerTestCommon.h | 16 +- .../JIT/JITMemoryManagerTest.cpp | 12 +- unittests/ExecutionEngine/JIT/JITTest.cpp | 85 +- unittests/ExecutionEngine/JIT/Makefile | 2 +- .../ExecutionEngine/JIT/MultiJITTest.cpp | 8 +- .../JIT/OProfileJITEventListenerTest.cpp | 5 +- .../ExecutionEngine/MCJIT/CMakeLists.txt | 4 +- .../MCJIT/MCJITMemoryManagerTest.cpp | 172 + unittests/ExecutionEngine/MCJIT/MCJITTest.cpp | 6 +- .../ExecutionEngine/MCJIT/MCJITTestBase.h | 17 +- .../MCJIT/SectionMemoryManager.cpp | 143 - .../MCJIT/SectionMemoryManager.h | 118 - unittests/ExecutionEngine/Makefile | 5 +- unittests/IR/AttributesTest.cpp | 34 + unittests/{VMCore => IR}/CMakeLists.txt | 10 +- unittests/IR/ConstantsTest.cpp | 260 + .../{VMCore => IR}/DominatorTreeTest.cpp | 19 +- unittests/{VMCore => IR}/IRBuilderTest.cpp | 94 +- unittests/{VMCore => IR}/InstructionsTest.cpp | 36 +- unittests/{VMCore => IR}/MDBuilderTest.cpp | 7 +- unittests/{VMCore => IR}/Makefile | 4 +- unittests/{VMCore => IR}/MetadataTest.cpp | 18 +- unittests/{VMCore => IR}/PassManagerTest.cpp | 58 +- unittests/{VMCore => IR}/TypeBuilderTest.cpp | 5 +- unittests/{VMCore => IR}/TypesTest.cpp | 6 +- unittests/{VMCore => IR}/ValueMapTest.cpp | 7 +- unittests/{VMCore => IR}/VerifierTest.cpp | 20 +- unittests/IR/WaymarkTest.cpp | 56 + unittests/Makefile | 2 +- unittests/Option/CMakeLists.txt | 15 + unittests/Option/OptionParsingTest.cpp | 106 + unittests/Option/Opts.td | 13 + unittests/Support/AlignOfTest.cpp | 26 +- unittests/Support/AllocatorTest.cpp | 1 - unittests/Support/ArrayRecyclerTest.cpp | 109 + unittests/Support/BlockFrequencyTest.cpp | 3 +- unittests/Support/CMakeLists.txt | 4 + unittests/Support/Casting.cpp | 1 - unittests/Support/CommandLineTest.cpp | 4 +- unittests/Support/ConstantRangeTest.cpp | 3 +- unittests/Support/EndianTest.cpp | 24 +- unittests/Support/ErrorOrTest.cpp | 104 + unittests/Support/FileOutputBufferTest.cpp | 25 +- unittests/Support/IntegersSubsetTest.cpp | 4 +- unittests/Support/ManagedStatic.cpp | 25 +- unittests/Support/MemoryBufferTest.cpp | 1 - unittests/Support/MemoryTest.cpp | 713 +- unittests/Support/Path.cpp | 18 +- unittests/Support/ProcessTest.cpp | 42 + unittests/Support/RegexTest.cpp | 25 +- unittests/Support/ValueHandleTest.cpp | 9 +- unittests/Support/YAMLIOTest.cpp | 1299 + unittests/Support/YAMLParserTest.cpp | 34 + .../Support/formatted_raw_ostream_test.cpp | 4 +- unittests/Transforms/Utils/Cloning.cpp | 12 +- .../Transforms/Utils/IntegerDivision.cpp | 12 +- unittests/Transforms/Utils/Local.cpp | 9 +- unittests/VMCore/ConstantsTest.cpp | 122 - utils/FileCheck/FileCheck.cpp | 286 +- utils/FileUpdate/FileUpdate.cpp | 4 +- utils/GenLibDeps.pl | 2 +- utils/KillTheDoctor/KillTheDoctor.cpp | 7 +- utils/PerfectShuffle/PerfectShuffle.cpp | 6 +- utils/TableGen/AsmMatcherEmitter.cpp | 36 +- utils/TableGen/AsmWriterEmitter.cpp | 19 +- utils/TableGen/CMakeLists.txt | 3 +- utils/TableGen/CTagsEmitter.cpp | 99 + utils/TableGen/CodeEmitterGen.cpp | 2 +- utils/TableGen/CodeGenDAGPatterns.cpp | 219 +- utils/TableGen/CodeGenDAGPatterns.h | 14 +- utils/TableGen/CodeGenInstruction.cpp | 6 +- utils/TableGen/CodeGenInstruction.h | 4 +- utils/TableGen/CodeGenIntrinsics.h | 2 +- utils/TableGen/CodeGenMapTable.cpp | 9 +- utils/TableGen/CodeGenRegisters.cpp | 49 +- utils/TableGen/CodeGenRegisters.h | 27 +- utils/TableGen/CodeGenSchedule.cpp | 302 +- utils/TableGen/CodeGenSchedule.h | 86 +- utils/TableGen/CodeGenTarget.cpp | 14 +- utils/TableGen/CodeGenTarget.h | 10 +- utils/TableGen/DAGISelMatcher.cpp | 4 +- utils/TableGen/DAGISelMatcher.h | 4 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 4 +- utils/TableGen/DAGISelMatcherGen.cpp | 57 +- utils/TableGen/DFAPacketizerEmitter.cpp | 11 +- utils/TableGen/DisassemblerEmitter.cpp | 5 +- utils/TableGen/EDEmitter.cpp | 1011 - utils/TableGen/FixedLenDecoderEmitter.cpp | 9 +- utils/TableGen/InstrInfoEmitter.cpp | 6 +- utils/TableGen/IntrinsicEmitter.cpp | 71 +- utils/TableGen/OptParserEmitter.cpp | 266 + utils/TableGen/PseudoLoweringEmitter.cpp | 1 + utils/TableGen/RegisterInfoEmitter.cpp | 110 +- utils/TableGen/SequenceToOffsetTable.h | 4 +- utils/TableGen/SetTheory.cpp | 2 +- utils/TableGen/SetTheory.h | 2 +- utils/TableGen/StringToOffsetTable.h | 2 +- utils/TableGen/SubtargetEmitter.cpp | 206 +- utils/TableGen/TableGen.cpp | 19 +- utils/TableGen/TableGenBackends.h | 3 +- utils/TableGen/X86DisassemblerShared.h | 2 +- utils/TableGen/X86DisassemblerTables.cpp | 5 +- utils/TableGen/X86DisassemblerTables.h | 2 - utils/TableGen/X86RecognizableInstr.cpp | 33 +- utils/TableGen/X86RecognizableInstr.h | 8 +- utils/TableGen/tdtags | 453 + utils/UpdateCMakeLists.pl | 2 +- utils/buildit/build_llvm | 194 +- utils/clang-parse-diagnostics-file | 36 +- utils/emacs/llvm-mode.el | 9 +- utils/git/find-rev | 8 +- utils/kate/llvm.xml | 1 + utils/lit/MANIFEST.in | 7 + utils/lit/TODO | 17 + utils/lit/lit/ExampleTests/Clang/lit.cfg | 2 +- .../LLVM.InTree/test/Bar/data.txt | 1 + .../ExampleTests/LLVM.InTree/test/Bar/dg.exp | 6 - .../LLVM.InTree/test/Bar/pct-S.ll | 1 + .../lit/ExampleTests/LLVM.InTree/test/lit.cfg | 75 +- .../LLVM.InTree/test/lit.site.cfg | 3 - .../ExampleTests/LLVM.InTree/test/site.exp | 10 - .../LLVM.OutOfTree/obj/test/lit.site.cfg | 3 - .../LLVM.OutOfTree/obj/test/site.exp | 10 - .../LLVM.OutOfTree/src/test/Foo/dg.exp | 6 - .../LLVM.OutOfTree/src/test/lit.cfg | 75 +- .../lit/ExampleTests/ManyTests/lit.local.cfg | 23 + .../lit/ExampleTests/TclTest/lit.local.cfg | 5 - .../lit/ExampleTests/TclTest/stderr-pipe.ll | 1 - .../lit/ExampleTests/TclTest/tcl-redir-1.ll | 7 - utils/lit/lit/ExampleTests/lit.cfg | 4 +- utils/lit/lit/LitConfig.py | 14 +- utils/lit/lit/LitFormats.py | 3 - utils/lit/lit/ShUtil.py | 22 +- utils/lit/lit/TclUtil.py | 322 - utils/lit/lit/Test.py | 4 + utils/lit/lit/TestFormats.py | 58 +- utils/lit/lit/TestRunner.py | 180 +- utils/lit/lit/__init__.py | 2 +- utils/lit/lit/discovery.py | 234 + utils/lit/lit/main.py | 267 +- utils/lit/tests/.coveragerc | 11 + utils/lit/tests/Inputs/discovery/lit.cfg | 5 + .../Inputs/discovery/subdir/lit.local.cfg | 1 + .../Inputs/discovery/subdir/test-three.py | 1 + .../tests/Inputs/discovery/subsuite/lit.cfg | 5 + .../Inputs/discovery/subsuite/test-one.txt | 1 + .../Inputs/discovery/subsuite/test-two.txt | 1 + utils/lit/tests/Inputs/discovery/test-one.txt | 1 + utils/lit/tests/Inputs/discovery/test-two.txt | 1 + .../shtest-format/external_shell/fail.txt | 3 + .../external_shell/lit.local.cfg | 1 + .../shtest-format/external_shell/pass.txt | 1 + utils/lit/tests/Inputs/shtest-format/fail.txt | 1 + utils/lit/tests/Inputs/shtest-format/lit.cfg | 7 + .../Inputs/shtest-format/no-test-line.txt | 1 + utils/lit/tests/Inputs/shtest-format/pass.txt | 1 + .../Inputs/shtest-format/requires-missing.txt | 2 + .../Inputs/shtest-format/requires-present.txt | 2 + .../unsupported_dir/lit.local.cfg | 1 + .../unsupported_dir/some-test.txt | 1 + .../Inputs/shtest-format/xfail-feature.txt | 2 + .../Inputs/shtest-format/xfail-target.txt | 2 + .../lit/tests/Inputs/shtest-format/xfail.txt | 2 + .../lit/tests/Inputs/shtest-format/xpass.txt | 2 + .../lit/tests/Inputs/shtest-shell/error-0.txt | 3 + .../lit/tests/Inputs/shtest-shell/error-1.txt | 3 + .../lit/tests/Inputs/shtest-shell/error-2.txt | 3 + utils/lit/tests/Inputs/shtest-shell/lit.cfg | 5 + .../tests/Inputs/shtest-shell/redirects.txt | 41 + .../Inputs/shtest-shell/sequencing-0.txt | 28 + .../Inputs/shtest-shell/sequencing-1.txt | 2 + .../Inputs/shtest-shell/write-to-stderr.sh | 3 + .../write-to-stdout-and-stderr.sh | 4 + .../lit/tests/Inputs/unittest-adaptor/lit.cfg | 5 + .../Inputs/unittest-adaptor/test-one.txt | 1 + .../Inputs/unittest-adaptor/test-two.txt | 1 + utils/lit/tests/discovery.py | 25 + utils/lit/tests/lit.cfg | 36 + utils/lit/tests/shell-parsing.py | 3 + utils/lit/tests/shtest-format.py | 43 + utils/lit/tests/shtest-shell.py | 33 + utils/lit/tests/unittest-adaptor.py | 18 + utils/lit/tests/usage.py | 6 + utils/lit/utils/README.txt | 2 + utils/lit/utils/check-coverage | 50 + utils/lit/utils/check-sdist | 44 + utils/llvm-build/llvmbuild/main.py | 8 +- utils/llvm-compilers-check | 104 +- utils/llvm-lit/llvm-lit.in | 3 +- utils/llvm.grm | 4 +- utils/llvm.natvis | 181 + utils/sort_includes.py | 87 + utils/testgen/mc-bundling-x86-gen.py | 103 + utils/textmate/README | 8 + .../Syntaxes/TableGen.tmLanguage | 132 + utils/textmate/TableGen.tmbundle/info.plist | 12 + utils/unittest/UnitTestMain/TestMain.cpp | 2 + utils/unittest/googletest/Makefile | 2 + utils/unittest/googletest/README.LLVM | 3 +- utils/unittest/googletest/gtest-all.cc | 48 + utils/unittest/googletest/gtest-filepath.cc | 2 - utils/unittest/googletest/gtest-printers.cc | 4 +- .../include/gtest/internal/gtest-internal.h | 9 + utils/valgrind/x86_64-pc-linux-gnu.supp | 6 + utils/vim/llvm.vim | 72 +- utils/vim/vimrc | 9 +- utils/wciia.py | 125 + utils/yaml-bench/YAMLBench.cpp | 4 +- utils/yaml2obj/yaml2obj.cpp | 937 +- 4058 files changed, 367586 insertions(+), 151868 deletions(-) create mode 100644 .arcconfig create mode 100644 cmake/modules/GetSVN.cmake create mode 100644 docs/CommandGuide/llvm-symbolizer.rst create mode 100644 docs/Dummy.html delete mode 100644 docs/GCCFEBuildInstrs.html delete mode 100644 docs/GarbageCollection.html create mode 100644 docs/GarbageCollection.rst delete mode 100644 docs/HowToReleaseLLVM.html create mode 100644 docs/HowToReleaseLLVM.rst create mode 100644 docs/HowToUseAttributes.rst delete mode 100644 docs/LLVMBuild.html create mode 100644 docs/LLVMBuild.rst delete mode 100644 docs/LangRef.html create mode 100644 docs/LangRef.rst create mode 100644 docs/NVPTXUsage.rst delete mode 100644 docs/Passes.html create mode 100644 docs/Passes.rst delete mode 100644 docs/ProgrammersManual.html create mode 100644 docs/ProgrammersManual.rst delete mode 100644 docs/ReleaseNotes.html create mode 100644 docs/ReleaseNotes.rst delete mode 100644 docs/SourceLevelDebugging.html create mode 100644 docs/SourceLevelDebugging.rst delete mode 100644 docs/SystemLibrary.html create mode 100644 docs/SystemLibrary.rst create mode 100644 docs/TableGen/LangRef.rst delete mode 100644 docs/TestSuiteMakefileGuide.html create mode 100644 docs/TestSuiteMakefileGuide.rst delete mode 100644 docs/TestingGuide.html create mode 100644 docs/TestingGuide.rst create mode 100644 docs/Vectorizers.rst delete mode 100644 docs/WritingAnLLVMBackend.html create mode 100644 docs/WritingAnLLVMBackend.rst delete mode 100644 docs/WritingAnLLVMPass.html create mode 100644 docs/WritingAnLLVMPass.rst create mode 100644 docs/YamlIO.rst delete mode 100644 docs/design_and_overview.rst delete mode 100644 docs/development_process.rst create mode 100644 docs/gcc-loops.png create mode 100644 docs/linpack-pc.png delete mode 100644 docs/mailing_lists.rst delete mode 100644 docs/programming.rst delete mode 100644 docs/subsystems.rst delete mode 100644 docs/tutorial/LangImpl1.html create mode 100644 docs/tutorial/LangImpl1.rst delete mode 100644 docs/tutorial/LangImpl2.html create mode 100644 docs/tutorial/LangImpl2.rst delete mode 100644 docs/tutorial/LangImpl3.html create mode 100644 docs/tutorial/LangImpl3.rst delete mode 100644 docs/tutorial/LangImpl4.html create mode 100644 docs/tutorial/LangImpl4.rst delete mode 100644 docs/tutorial/LangImpl5.html create mode 100644 docs/tutorial/LangImpl5.rst delete mode 100644 docs/tutorial/LangImpl6.html create mode 100644 docs/tutorial/LangImpl6.rst delete mode 100644 docs/tutorial/LangImpl7.html create mode 100644 docs/tutorial/LangImpl7.rst delete mode 100644 docs/tutorial/LangImpl8.html create mode 100644 docs/tutorial/LangImpl8.rst delete mode 100644 docs/tutorial/OCamlLangImpl1.html create mode 100644 docs/tutorial/OCamlLangImpl1.rst delete mode 100644 docs/tutorial/OCamlLangImpl2.html create mode 100644 docs/tutorial/OCamlLangImpl2.rst delete mode 100644 docs/tutorial/OCamlLangImpl3.html create mode 100644 docs/tutorial/OCamlLangImpl3.rst delete mode 100644 docs/tutorial/OCamlLangImpl4.html create mode 100644 docs/tutorial/OCamlLangImpl4.rst delete mode 100644 docs/tutorial/OCamlLangImpl5.html create mode 100644 docs/tutorial/OCamlLangImpl5.rst delete mode 100644 docs/tutorial/OCamlLangImpl6.html create mode 100644 docs/tutorial/OCamlLangImpl6.rst delete mode 100644 docs/tutorial/OCamlLangImpl7.html create mode 100644 docs/tutorial/OCamlLangImpl7.rst delete mode 100644 docs/tutorial/OCamlLangImpl8.html create mode 100644 docs/tutorial/OCamlLangImpl8.rst delete mode 100644 docs/tutorial/index.html create mode 100644 docs/tutorial/index.rst delete mode 100644 docs/userguides.rst delete mode 100644 include/llvm-c/EnhancedDisassembly.h create mode 100644 include/llvm/ADT/None.h create mode 100644 include/llvm/ADT/SparseMultiSet.h delete mode 100644 include/llvm/AddressingMode.h rename include/llvm/{ => Analysis}/CallGraphSCCPass.h (96%) create mode 100644 include/llvm/Analysis/CallPrinter.h create mode 100644 include/llvm/Analysis/PtrUseVisitor.h create mode 100644 include/llvm/Analysis/TargetTransformInfo.h delete mode 100644 include/llvm/Argument.h delete mode 100644 include/llvm/Attributes.h create mode 100644 include/llvm/CodeGen/DAGCombine.h rename {lib => include/llvm}/CodeGen/LiveIntervalUnion.h (95%) rename {lib => include/llvm}/CodeGen/LiveRegMatrix.h (99%) delete mode 100644 include/llvm/CodeGen/MachineLoopRanges.h rename {lib => include/llvm}/CodeGen/MachineTraceMetrics.h (85%) delete mode 100644 include/llvm/CodeGen/ScheduleDAGILP.h create mode 100644 include/llvm/CodeGen/ScheduleDFS.h rename {lib => include/llvm}/CodeGen/VirtRegMap.h (95%) delete mode 100644 include/llvm/DefaultPasses.h create mode 100644 include/llvm/ExecutionEngine/SectionMemoryManager.h create mode 100644 include/llvm/IR/Argument.h create mode 100644 include/llvm/IR/Attributes.h rename include/llvm/{ => IR}/BasicBlock.h (53%) create mode 100644 include/llvm/IR/CMakeLists.txt rename include/llvm/{ => IR}/CallingConv.h (96%) rename include/llvm/{ => IR}/Constant.h (92%) rename include/llvm/{ => IR}/Constants.h (97%) rename include/llvm/{ => IR}/DataLayout.h (85%) rename include/llvm/{ => IR}/DerivedTypes.h (95%) rename include/llvm/{ => IR}/Function.h (80%) rename include/llvm/{ => IR}/GlobalAlias.h (95%) rename include/llvm/{ => IR}/GlobalValue.h (99%) rename include/llvm/{ => IR}/GlobalVariable.h (80%) rename include/llvm/{ => IR}/IRBuilder.h (82%) rename include/llvm/{ => IR}/InlineAsm.h (99%) rename include/llvm/{ => IR}/InstrTypes.h (99%) rename include/llvm/{ => IR}/Instruction.def (100%) rename include/llvm/{ => IR}/Instruction.h (84%) rename include/llvm/{ => IR}/Instructions.h (96%) rename include/llvm/{ => IR}/IntrinsicInst.h (97%) rename include/llvm/{ => IR}/Intrinsics.h (94%) rename include/llvm/{ => IR}/Intrinsics.td (95%) rename include/llvm/{ => IR}/IntrinsicsARM.td (100%) rename include/llvm/{ => IR}/IntrinsicsHexagon.td (100%) rename include/llvm/{ => IR}/IntrinsicsMips.td (100%) rename include/llvm/{ => IR}/IntrinsicsNVVM.td (97%) rename include/llvm/{ => IR}/IntrinsicsPowerPC.td (99%) create mode 100644 include/llvm/IR/IntrinsicsR600.td rename include/llvm/{ => IR}/IntrinsicsX86.td (99%) rename include/llvm/{ => IR}/IntrinsicsXCore.td (100%) rename include/llvm/{ => IR}/LLVMContext.h (96%) create mode 100644 include/llvm/IR/MDBuilder.h rename include/llvm/{ => IR}/Metadata.h (97%) rename include/llvm/{ => IR}/Module.h (89%) rename include/llvm/{ => IR}/OperandTraits.h (98%) rename include/llvm/{ => IR}/Operator.h (62%) rename include/llvm/{ => IR}/SymbolTableListTraits.h (97%) rename include/llvm/{ => IR}/Type.h (96%) rename include/llvm/{ => IR}/TypeBuilder.h (99%) rename include/llvm/{ => IR}/TypeFinder.h (95%) rename include/llvm/{ => IR}/Use.h (95%) rename include/llvm/{ => IR}/User.h (92%) rename include/llvm/{ => IR}/Value.h (99%) rename include/llvm/{ => IR}/ValueSymbolTable.h (97%) create mode 100644 include/llvm/IRReader/IRReader.h rename include/llvm/{Support => }/InstVisitor.h (97%) delete mode 100644 include/llvm/IntrinsicsCellSPU.td rename include/llvm/{LinkAllVMCore.h => LinkAllIR.h} (85%) delete mode 100644 include/llvm/MC/EDInstInfo.h rename {lib => include/llvm}/MC/MCELF.h (90%) create mode 100644 include/llvm/MC/MCELFStreamer.h create mode 100644 include/llvm/MC/MCInstBuilder.h delete mode 100644 include/llvm/MC/MCTargetAsmLexer.h delete mode 100644 include/llvm/MDBuilder.h create mode 100644 include/llvm/Option/Arg.h create mode 100644 include/llvm/Option/ArgList.h create mode 100644 include/llvm/Option/OptParser.td create mode 100644 include/llvm/Option/OptSpecifier.h create mode 100644 include/llvm/Option/OptTable.h create mode 100644 include/llvm/Option/Option.h create mode 100644 include/llvm/Support/ArrayRecycler.h create mode 100644 include/llvm/Support/ConvertUTF.h create mode 100644 include/llvm/Support/ErrorOr.h delete mode 100644 include/llvm/Support/IRReader.h create mode 100644 include/llvm/Support/Watchdog.h create mode 100644 include/llvm/Support/YAMLTraits.h create mode 100644 include/llvm/Target/CostTable.h delete mode 100644 include/llvm/Target/TargetTransformImpl.h delete mode 100644 include/llvm/TargetTransformInfo.h create mode 100644 include/llvm/Transforms/ObjCARC.h delete mode 100644 include/llvm/Transforms/Utils/AddrModeMatcher.h rename {lib/Transforms/Instrumentation => include/llvm/Transforms/Utils}/BlackList.h (89%) delete mode 100644 lib/Analysis/DbgInfoPrinter.cpp create mode 100644 lib/Analysis/IPA/CallPrinter.cpp rename lib/Analysis/{ => IPA}/InlineCost.cpp (75%) create mode 100644 lib/Analysis/PtrUseVisitor.cpp create mode 100644 lib/Analysis/TargetTransformInfo.cpp create mode 100644 lib/Bitcode/Reader/BitstreamReader.cpp create mode 100644 lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp create mode 100644 lib/CodeGen/BasicTargetTransformInfo.cpp delete mode 100644 lib/CodeGen/CodePlacementOpt.cpp create mode 100644 lib/CodeGen/ErlangGC.cpp delete mode 100644 lib/CodeGen/MachineLoopRanges.cpp rename lib/CodeGen/{TargetInstrInfoImpl.cpp => TargetInstrInfo.cpp} (80%) create mode 100644 lib/CodeGen/TargetLoweringBase.cpp rename lib/{Target => CodeGen}/TargetRegisterInfo.cpp (84%) create mode 100644 lib/DebugInfo/DWARFDebugFrame.cpp create mode 100644 lib/DebugInfo/DWARFDebugFrame.h create mode 100644 lib/DebugInfo/DWARFRelocMap.h create mode 100644 lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp rename lib/{VMCore => IR}/AsmWriter.cpp (90%) create mode 100644 lib/IR/AttributeImpl.h create mode 100644 lib/IR/Attributes.cpp rename lib/{VMCore => IR}/AutoUpgrade.cpp (98%) rename lib/{VMCore => IR}/BasicBlock.cpp (98%) rename lib/{VMCore => IR}/CMakeLists.txt (97%) rename lib/{VMCore => IR}/ConstantFold.cpp (97%) rename lib/{VMCore => IR}/ConstantFold.h (100%) rename lib/{VMCore => IR}/Constants.cpp (94%) rename lib/{VMCore => IR}/ConstantsContext.h (99%) rename lib/{VMCore => IR}/Core.cpp (96%) rename lib/{VMCore => IR}/DIBuilder.cpp (79%) rename lib/{VMCore => IR}/DataLayout.cpp (78%) rename lib/{VMCore => IR}/DebugInfo.cpp (79%) rename lib/{VMCore => IR}/DebugLoc.cpp (100%) rename lib/{VMCore => IR}/Dominators.cpp (99%) rename lib/{VMCore => IR}/Function.cpp (83%) rename lib/{VMCore => IR}/GCOV.cpp (100%) rename lib/{VMCore => IR}/GVMaterializer.cpp (100%) rename lib/{VMCore => IR}/Globals.cpp (91%) rename lib/{VMCore => IR}/IRBuilder.cpp (97%) rename lib/{VMCore => IR}/InlineAsm.cpp (97%) rename lib/{VMCore => IR}/Instruction.cpp (79%) rename lib/{VMCore => IR}/Instructions.cpp (97%) rename lib/{VMCore => IR}/IntrinsicInst.cpp (94%) rename lib/{VMCore => IR}/LLVMBuild.txt (90%) rename lib/{VMCore => IR}/LLVMContext.cpp (90%) rename lib/{VMCore => IR}/LLVMContextImpl.cpp (89%) rename lib/{VMCore => IR}/LLVMContextImpl.h (95%) rename lib/{VMCore => IR}/LeakDetector.cpp (98%) rename lib/{VMCore => IR}/LeaksContext.h (98%) rename lib/{VMCore => IR}/Makefile (62%) rename lib/{VMCore => IR}/Metadata.cpp (99%) rename lib/{VMCore => IR}/Module.cpp (94%) rename lib/{VMCore => IR}/Pass.cpp (94%) rename lib/{VMCore => IR}/PassManager.cpp (96%) rename lib/{VMCore => IR}/PassRegistry.cpp (99%) rename lib/{VMCore => IR}/PrintModulePass.cpp (70%) rename lib/{VMCore => IR}/SymbolTableListTraitsImpl.h (98%) rename lib/{VMCore => IR}/Type.cpp (96%) rename lib/{VMCore => IR}/TypeFinder.cpp (94%) rename lib/{VMCore => IR}/Use.cpp (98%) rename lib/{VMCore => IR}/User.cpp (96%) rename lib/{VMCore => IR}/Value.cpp (97%) rename lib/{VMCore => IR}/ValueSymbolTable.cpp (95%) rename lib/{VMCore => IR}/ValueTypes.cpp (87%) rename lib/{VMCore => IR}/Verifier.cpp (85%) create mode 100644 lib/IRReader/CMakeLists.txt create mode 100644 lib/IRReader/IRReader.cpp rename lib/{Target/CellSPU/MCTargetDesc => IRReader}/LLVMBuild.txt (77%) create mode 100644 lib/IRReader/Makefile delete mode 100644 lib/Linker/LinkArchives.cpp delete mode 100644 lib/Linker/LinkItems.cpp delete mode 100644 lib/MC/MCDisassembler/EDDisassembler.cpp delete mode 100644 lib/MC/MCDisassembler/EDDisassembler.h delete mode 100644 lib/MC/MCDisassembler/EDInfo.h delete mode 100644 lib/MC/MCDisassembler/EDInst.cpp delete mode 100644 lib/MC/MCDisassembler/EDInst.h delete mode 100644 lib/MC/MCDisassembler/EDMain.cpp delete mode 100644 lib/MC/MCDisassembler/EDOperand.cpp delete mode 100644 lib/MC/MCDisassembler/EDOperand.h delete mode 100644 lib/MC/MCDisassembler/EDToken.cpp delete mode 100644 lib/MC/MCDisassembler/EDToken.h create mode 100644 lib/Option/Arg.cpp create mode 100644 lib/Option/ArgList.cpp create mode 100644 lib/Option/CMakeLists.txt create mode 100644 lib/Option/LLVMBuild.txt create mode 100644 lib/Option/Makefile create mode 100644 lib/Option/OptTable.cpp create mode 100644 lib/Option/Option.cpp create mode 100644 lib/Support/ConvertUTF.c create mode 100644 lib/Support/ConvertUTFWrapper.cpp create mode 100644 lib/Support/Unix/Watchdog.inc create mode 100644 lib/Support/Watchdog.cpp create mode 100644 lib/Support/Windows/Watchdog.inc create mode 100644 lib/Support/YAMLTraits.cpp create mode 100644 lib/Target/AArch64/AArch64.h create mode 100644 lib/Target/AArch64/AArch64.td create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.cpp create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.h create mode 100644 lib/Target/AArch64/AArch64BranchFixupPass.cpp create mode 100644 lib/Target/AArch64/AArch64CallingConv.td create mode 100644 lib/Target/AArch64/AArch64FrameLowering.cpp create mode 100644 lib/Target/AArch64/AArch64FrameLowering.h create mode 100644 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.h create mode 100644 lib/Target/AArch64/AArch64InstrFormats.td create mode 100644 lib/Target/AArch64/AArch64InstrInfo.cpp create mode 100644 lib/Target/AArch64/AArch64InstrInfo.h create mode 100644 lib/Target/AArch64/AArch64InstrInfo.td create mode 100644 lib/Target/AArch64/AArch64MCInstLower.cpp rename lib/Target/{CellSPU/SPUSelectionDAGInfo.cpp => AArch64/AArch64MachineFunctionInfo.cpp} (50%) create mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.cpp create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td rename lib/Target/{CellSPU/SPUMachineFunction.cpp => AArch64/AArch64Schedule.td} (60%) create mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.cpp rename lib/Target/{CellSPU/SPUSelectionDAGInfo.h => AArch64/AArch64SelectionDAGInfo.h} (51%) create mode 100644 lib/Target/AArch64/AArch64Subtarget.cpp create mode 100644 lib/Target/AArch64/AArch64Subtarget.h create mode 100644 lib/Target/AArch64/AArch64TargetMachine.cpp create mode 100644 lib/Target/AArch64/AArch64TargetMachine.h create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.cpp create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.h create mode 100644 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp create mode 100644 lib/Target/AArch64/AsmParser/CMakeLists.txt create mode 100644 lib/Target/AArch64/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/AArch64/AsmParser/Makefile create mode 100644 lib/Target/AArch64/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp create mode 100644 lib/Target/AArch64/Disassembler/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Disassembler/Makefile create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h create mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/AArch64/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/AArch64/InstPrinter/Makefile create mode 100644 lib/Target/AArch64/LLVMBuild.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp rename lib/Target/{CellSPU/MCTargetDesc/SPUMCAsmInfo.h => AArch64/MCTargetDesc/AArch64MCAsmInfo.h} (51%) create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h create mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt rename lib/Target/{CellSPU => AArch64}/MCTargetDesc/Makefile (82%) create mode 100644 lib/Target/AArch64/Makefile create mode 100644 lib/Target/AArch64/README.txt create mode 100644 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp create mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt rename lib/Target/{CellSPU => AArch64}/TargetInfo/LLVMBuild.txt (82%) rename lib/Target/{CellSPU => AArch64}/TargetInfo/Makefile (82%) create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.h create mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt create mode 100644 lib/Target/AArch64/Utils/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Utils/Makefile create mode 100644 lib/Target/ARM/A15SDOptimizer.cpp create mode 100644 lib/Target/ARM/ARMTargetTransformInfo.cpp delete mode 100644 lib/Target/ARM/AsmParser/ARMAsmLexer.cpp create mode 100755 lib/Target/ARM/LICENSE.TXT create mode 100644 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp create mode 100644 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h delete mode 100644 lib/Target/CellSPU/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/CellSDKIntrinsics.td delete mode 100644 lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h delete mode 100644 lib/Target/CellSPU/Makefile delete mode 100644 lib/Target/CellSPU/README.txt delete mode 100644 lib/Target/CellSPU/SPU.h delete mode 100644 lib/Target/CellSPU/SPU.td delete mode 100644 lib/Target/CellSPU/SPU128InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPU64InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUAsmPrinter.cpp delete mode 100644 lib/Target/CellSPU/SPUCallingConv.td delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.h delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.cpp delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.h delete mode 100644 lib/Target/CellSPU/SPUISelDAGToDAG.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.h delete mode 100644 lib/Target/CellSPU/SPUInstrBuilder.h delete mode 100644 lib/Target/CellSPU/SPUInstrFormats.td delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.h delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUMachineFunction.h delete mode 100644 lib/Target/CellSPU/SPUMathInstr.td delete mode 100644 lib/Target/CellSPU/SPUNodes.td delete mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp delete mode 100644 lib/Target/CellSPU/SPUOperands.td delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.cpp delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.h delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.td delete mode 100644 lib/Target/CellSPU/SPURegisterNames.h delete mode 100644 lib/Target/CellSPU/SPUSchedule.td delete mode 100644 lib/Target/CellSPU/SPUSubtarget.cpp delete mode 100644 lib/Target/CellSPU/SPUSubtarget.h delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.cpp delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.h delete mode 100644 lib/Target/CellSPU/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp create mode 100644 lib/Target/Hexagon/HexagonFixupHwLoops.cpp delete mode 100644 lib/Target/Hexagon/HexagonImmediates.td delete mode 100644 lib/Target/Hexagon/HexagonMCInst.h create mode 100644 lib/Target/Hexagon/HexagonOperands.td create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h delete mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.h create mode 100644 lib/Target/Mips/Mips16ISelDAGToDAG.cpp create mode 100644 lib/Target/Mips/Mips16ISelDAGToDAG.h create mode 100644 lib/Target/Mips/Mips16ISelLowering.cpp create mode 100644 lib/Target/Mips/Mips16ISelLowering.h create mode 100644 lib/Target/Mips/MipsConstantIslandPass.cpp create mode 100644 lib/Target/Mips/MipsISelDAGToDAG.h create mode 100644 lib/Target/Mips/MipsSEISelDAGToDAG.cpp create mode 100644 lib/Target/Mips/MipsSEISelDAGToDAG.h create mode 100644 lib/Target/Mips/MipsSEISelLowering.cpp create mode 100644 lib/Target/Mips/MipsSEISelLowering.h create mode 100644 lib/Target/NVPTX/NVVMReflect.cpp delete mode 100644 lib/Target/NVPTX/VectorElementize.cpp delete mode 100644 lib/Target/NVPTX/gen-register-defs.py delete mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h create mode 100644 lib/Target/PowerPC/PPCTargetTransformInfo.cpp create mode 100644 lib/Target/R600/AMDGPU.h create mode 100644 lib/Target/R600/AMDGPU.td create mode 100644 lib/Target/R600/AMDGPUAsmPrinter.cpp create mode 100644 lib/Target/R600/AMDGPUAsmPrinter.h create mode 100644 lib/Target/R600/AMDGPUCallingConv.td create mode 100644 lib/Target/R600/AMDGPUConvertToISA.cpp create mode 100644 lib/Target/R600/AMDGPUFrameLowering.cpp create mode 100644 lib/Target/R600/AMDGPUFrameLowering.h create mode 100644 lib/Target/R600/AMDGPUISelLowering.cpp create mode 100644 lib/Target/R600/AMDGPUISelLowering.h create mode 100644 lib/Target/R600/AMDGPUIndirectAddressing.cpp create mode 100644 lib/Target/R600/AMDGPUInstrInfo.cpp create mode 100644 lib/Target/R600/AMDGPUInstrInfo.h create mode 100644 lib/Target/R600/AMDGPUInstrInfo.td create mode 100644 lib/Target/R600/AMDGPUInstructions.td create mode 100644 lib/Target/R600/AMDGPUIntrinsics.td create mode 100644 lib/Target/R600/AMDGPUMCInstLower.cpp create mode 100644 lib/Target/R600/AMDGPUMCInstLower.h create mode 100644 lib/Target/R600/AMDGPUMachineFunction.cpp create mode 100644 lib/Target/R600/AMDGPUMachineFunction.h create mode 100644 lib/Target/R600/AMDGPURegisterInfo.cpp create mode 100644 lib/Target/R600/AMDGPURegisterInfo.h create mode 100644 lib/Target/R600/AMDGPURegisterInfo.td create mode 100644 lib/Target/R600/AMDGPUStructurizeCFG.cpp create mode 100644 lib/Target/R600/AMDGPUSubtarget.cpp create mode 100644 lib/Target/R600/AMDGPUSubtarget.h create mode 100644 lib/Target/R600/AMDGPUTargetMachine.cpp create mode 100644 lib/Target/R600/AMDGPUTargetMachine.h create mode 100644 lib/Target/R600/AMDIL.h create mode 100644 lib/Target/R600/AMDIL7XXDevice.cpp create mode 100644 lib/Target/R600/AMDIL7XXDevice.h create mode 100644 lib/Target/R600/AMDILBase.td create mode 100644 lib/Target/R600/AMDILCFGStructurizer.cpp create mode 100644 lib/Target/R600/AMDILDevice.cpp create mode 100644 lib/Target/R600/AMDILDevice.h create mode 100644 lib/Target/R600/AMDILDeviceInfo.cpp create mode 100644 lib/Target/R600/AMDILDeviceInfo.h create mode 100644 lib/Target/R600/AMDILDevices.h create mode 100644 lib/Target/R600/AMDILEvergreenDevice.cpp create mode 100644 lib/Target/R600/AMDILEvergreenDevice.h create mode 100644 lib/Target/R600/AMDILISelDAGToDAG.cpp create mode 100644 lib/Target/R600/AMDILISelLowering.cpp create mode 100644 lib/Target/R600/AMDILInstrInfo.td create mode 100644 lib/Target/R600/AMDILIntrinsicInfo.cpp create mode 100644 lib/Target/R600/AMDILIntrinsicInfo.h create mode 100644 lib/Target/R600/AMDILIntrinsics.td create mode 100644 lib/Target/R600/AMDILNIDevice.cpp create mode 100644 lib/Target/R600/AMDILNIDevice.h create mode 100644 lib/Target/R600/AMDILPeepholeOptimizer.cpp create mode 100644 lib/Target/R600/AMDILRegisterInfo.td create mode 100644 lib/Target/R600/AMDILSIDevice.cpp create mode 100644 lib/Target/R600/AMDILSIDevice.h create mode 100644 lib/Target/R600/CMakeLists.txt create mode 100644 lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp create mode 100644 lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h create mode 100644 lib/Target/R600/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/R600/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/R600/InstPrinter/Makefile rename lib/Target/{CellSPU => R600}/LLVMBuild.txt (68%) create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h create mode 100644 lib/Target/R600/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/R600/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/R600/MCTargetDesc/Makefile create mode 100644 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp create mode 100644 lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp create mode 100644 lib/Target/R600/Makefile create mode 100644 lib/Target/R600/Processors.td create mode 100644 lib/Target/R600/R600ControlFlowFinalizer.cpp create mode 100644 lib/Target/R600/R600Defines.h create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp create mode 100644 lib/Target/R600/R600ExpandSpecialInstrs.cpp create mode 100644 lib/Target/R600/R600ISelLowering.cpp create mode 100644 lib/Target/R600/R600ISelLowering.h create mode 100644 lib/Target/R600/R600InstrInfo.cpp create mode 100644 lib/Target/R600/R600InstrInfo.h create mode 100644 lib/Target/R600/R600Instructions.td create mode 100644 lib/Target/R600/R600Intrinsics.td rename lib/{MC/MCTargetAsmLexer.cpp => Target/R600/R600MachineFunctionInfo.cpp} (53%) create mode 100644 lib/Target/R600/R600MachineFunctionInfo.h create mode 100644 lib/Target/R600/R600MachineScheduler.cpp create mode 100644 lib/Target/R600/R600MachineScheduler.h create mode 100644 lib/Target/R600/R600RegisterInfo.cpp create mode 100644 lib/Target/R600/R600RegisterInfo.h create mode 100644 lib/Target/R600/R600RegisterInfo.td create mode 100644 lib/Target/R600/R600Schedule.td create mode 100644 lib/Target/R600/SIAnnotateControlFlow.cpp create mode 100644 lib/Target/R600/SIISelLowering.cpp create mode 100644 lib/Target/R600/SIISelLowering.h create mode 100644 lib/Target/R600/SIInsertWaits.cpp create mode 100644 lib/Target/R600/SIInstrFormats.td create mode 100644 lib/Target/R600/SIInstrInfo.cpp create mode 100644 lib/Target/R600/SIInstrInfo.h create mode 100644 lib/Target/R600/SIInstrInfo.td create mode 100644 lib/Target/R600/SIInstructions.td create mode 100644 lib/Target/R600/SIIntrinsics.td create mode 100644 lib/Target/R600/SILowerControlFlow.cpp create mode 100644 lib/Target/R600/SIMachineFunctionInfo.cpp create mode 100644 lib/Target/R600/SIMachineFunctionInfo.h create mode 100644 lib/Target/R600/SIRegisterInfo.cpp create mode 100644 lib/Target/R600/SIRegisterInfo.h create mode 100644 lib/Target/R600/SIRegisterInfo.td create mode 100644 lib/Target/R600/SISchedule.td create mode 100644 lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp create mode 100644 lib/Target/R600/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/R600/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/R600/TargetInfo/Makefile create mode 100644 lib/Target/Sparc/SparcInstr64Bit.td delete mode 100644 lib/Target/TargetInstrInfo.cpp delete mode 100644 lib/Target/TargetTransformImpl.cpp delete mode 100644 lib/Target/X86/AsmParser/X86AsmLexer.cpp create mode 100644 lib/Target/X86/X86PadShortFunction.cpp create mode 100644 lib/Target/X86/X86SchedHaswell.td create mode 100644 lib/Target/X86/X86SchedSandyBridge.td create mode 100644 lib/Target/X86/X86TargetTransformInfo.cpp create mode 100644 lib/Target/XCore/Disassembler/CMakeLists.txt create mode 100644 lib/Target/XCore/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/XCore/Disassembler/Makefile create mode 100644 lib/Target/XCore/Disassembler/XCoreDisassembler.cpp create mode 100644 lib/Target/XCore/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/XCore/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/XCore/InstPrinter/Makefile create mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp create mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.h create mode 100644 lib/Target/XCore/XCoreMCInstLower.cpp create mode 100644 lib/Target/XCore/XCoreMCInstLower.h create mode 100644 lib/Transforms/Instrumentation/MemorySanitizer.cpp create mode 100644 lib/Transforms/ObjCARC/CMakeLists.txt create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.h create mode 100644 lib/Transforms/ObjCARC/LLVMBuild.txt create mode 100644 lib/Transforms/ObjCARC/Makefile create mode 100644 lib/Transforms/ObjCARC/ObjCARC.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARC.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCContract.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCExpand.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCOpts.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCUtil.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.h delete mode 100644 lib/Transforms/Scalar/ObjCARC.cpp delete mode 100644 lib/Transforms/Utils/AddrModeMatcher.cpp delete mode 100644 lib/VMCore/Attributes.cpp delete mode 100644 lib/VMCore/AttributesImpl.h delete mode 100644 lib/VMCore/TargetTransformInfo.cpp delete mode 100644 runtime/libprofile/GCDAProfiling.c delete mode 100644 runtime/libprofile/libprofile.exports create mode 100644 test/Analysis/BasicAA/invariant_load.ll create mode 100644 test/Analysis/BasicAA/phi-spec-order.ll create mode 100644 test/Analysis/CostModel/ARM/cast.ll create mode 100644 test/Analysis/CostModel/ARM/gep.ll create mode 100644 test/Analysis/CostModel/ARM/insertelement.ll rename test/{CodeGen/CellSPU => Analysis/CostModel/ARM}/lit.local.cfg (81%) create mode 100644 test/Analysis/CostModel/ARM/select.ll create mode 100644 test/Analysis/CostModel/ARM/shuffle.ll create mode 100644 test/Analysis/CostModel/PowerPC/insert_extract.ll create mode 100644 test/Analysis/CostModel/PowerPC/lit.local.cfg create mode 100644 test/Analysis/CostModel/PowerPC/load_store.ll create mode 100644 test/Analysis/CostModel/X86/gep.ll create mode 100644 test/Analysis/CostModel/X86/intrinsic-cost.ll create mode 100644 test/Analysis/CostModel/X86/load_store.ll create mode 100644 test/Analysis/CostModel/X86/testshiftashr.ll create mode 100644 test/Analysis/CostModel/X86/testshiftlshr.ll create mode 100644 test/Analysis/CostModel/X86/testshiftshl.ll create mode 100644 test/Analysis/ScalarEvolution/scev-invalid.ll create mode 100644 test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll create mode 100644 test/Assembler/ConstantExprNoFold.ll create mode 100644 test/Assembler/externally-initialized.ll create mode 100644 test/Assembler/fast-math-flags.ll create mode 100644 test/Assembler/getelementptr_vec_idx1.ll create mode 100644 test/Assembler/getelementptr_vec_idx2.ll create mode 100644 test/Assembler/getelementptr_vec_idx3.ll create mode 100644 test/Assembler/getelementptr_vec_struct.ll create mode 100644 test/CodeGen/AArch64/adc.ll create mode 100644 test/CodeGen/AArch64/addsub-shifted.ll create mode 100644 test/CodeGen/AArch64/addsub.ll create mode 100644 test/CodeGen/AArch64/addsub_ext.ll create mode 100644 test/CodeGen/AArch64/adrp-relocation.ll create mode 100644 test/CodeGen/AArch64/alloca.ll create mode 100644 test/CodeGen/AArch64/analyze-branch.ll create mode 100644 test/CodeGen/AArch64/atomic-ops-not-barriers.ll create mode 100644 test/CodeGen/AArch64/atomic-ops.ll create mode 100644 test/CodeGen/AArch64/basic-pic.ll create mode 100644 test/CodeGen/AArch64/bitfield-insert-0.ll create mode 100644 test/CodeGen/AArch64/bitfield-insert.ll create mode 100644 test/CodeGen/AArch64/bitfield.ll create mode 100644 test/CodeGen/AArch64/blockaddress.ll create mode 100644 test/CodeGen/AArch64/bool-loads.ll create mode 100644 test/CodeGen/AArch64/breg.ll create mode 100644 test/CodeGen/AArch64/callee-save.ll create mode 100644 test/CodeGen/AArch64/compare-branch.ll create mode 100644 test/CodeGen/AArch64/cond-sel.ll create mode 100644 test/CodeGen/AArch64/directcond.ll create mode 100644 test/CodeGen/AArch64/dp-3source.ll create mode 100644 test/CodeGen/AArch64/dp1.ll create mode 100644 test/CodeGen/AArch64/dp2.ll create mode 100644 test/CodeGen/AArch64/elf-extern.ll create mode 100644 test/CodeGen/AArch64/extern-weak.ll create mode 100644 test/CodeGen/AArch64/extract.ll create mode 100644 test/CodeGen/AArch64/fastcc-reserved.ll create mode 100644 test/CodeGen/AArch64/fastcc.ll create mode 100644 test/CodeGen/AArch64/fcmp.ll create mode 100644 test/CodeGen/AArch64/fcvt-fixed.ll create mode 100644 test/CodeGen/AArch64/fcvt-int.ll create mode 100644 test/CodeGen/AArch64/flags-multiuse.ll create mode 100644 test/CodeGen/AArch64/floatdp_1source.ll create mode 100644 test/CodeGen/AArch64/floatdp_2source.ll create mode 100644 test/CodeGen/AArch64/fp-cond-sel.ll create mode 100644 test/CodeGen/AArch64/fp-dp3.ll create mode 100644 test/CodeGen/AArch64/fp128-folding.ll create mode 100644 test/CodeGen/AArch64/fp128.ll create mode 100644 test/CodeGen/AArch64/fpimm.ll create mode 100644 test/CodeGen/AArch64/func-argpassing.ll create mode 100644 test/CodeGen/AArch64/func-calls.ll create mode 100644 test/CodeGen/AArch64/global-alignment.ll create mode 100644 test/CodeGen/AArch64/got-abuse.ll create mode 100644 test/CodeGen/AArch64/i128-align.ll create mode 100644 test/CodeGen/AArch64/illegal-float-ops.ll create mode 100644 test/CodeGen/AArch64/init-array.ll create mode 100644 test/CodeGen/AArch64/inline-asm-constraints-badI.ll create mode 100644 test/CodeGen/AArch64/inline-asm-constraints-badK.ll create mode 100644 test/CodeGen/AArch64/inline-asm-constraints-badK2.ll create mode 100644 test/CodeGen/AArch64/inline-asm-constraints-badL.ll create mode 100644 test/CodeGen/AArch64/inline-asm-constraints.ll create mode 100644 test/CodeGen/AArch64/inline-asm-modifiers.ll create mode 100644 test/CodeGen/AArch64/jump-table.ll create mode 100644 test/CodeGen/AArch64/large-frame.ll create mode 100644 test/CodeGen/AArch64/ldst-regoffset.ll create mode 100644 test/CodeGen/AArch64/ldst-unscaledimm.ll create mode 100644 test/CodeGen/AArch64/ldst-unsignedimm.ll create mode 100644 test/CodeGen/AArch64/lit.local.cfg create mode 100644 test/CodeGen/AArch64/literal_pools.ll create mode 100644 test/CodeGen/AArch64/local_vars.ll create mode 100644 test/CodeGen/AArch64/logical-imm.ll create mode 100644 test/CodeGen/AArch64/logical_shifted_reg.ll create mode 100644 test/CodeGen/AArch64/logical_shifted_reg.s create mode 100644 test/CodeGen/AArch64/movw-consts.ll create mode 100644 test/CodeGen/AArch64/pic-eh-stubs.ll create mode 100644 test/CodeGen/AArch64/regress-bitcast-formals.ll create mode 100644 test/CodeGen/AArch64/regress-f128csel-flags.ll create mode 100644 test/CodeGen/AArch64/regress-tail-livereg.ll create mode 100644 test/CodeGen/AArch64/regress-tblgen-chains.ll create mode 100644 test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll create mode 100644 test/CodeGen/AArch64/regress-wzr-allocatable.ll create mode 100644 test/CodeGen/AArch64/setcc-takes-i32.ll create mode 100644 test/CodeGen/AArch64/sibling-call.ll create mode 100644 test/CodeGen/AArch64/sincos-expansion.ll create mode 100644 test/CodeGen/AArch64/tail-call.ll create mode 100644 test/CodeGen/AArch64/tls-dynamic-together.ll create mode 100644 test/CodeGen/AArch64/tls-dynamics.ll create mode 100644 test/CodeGen/AArch64/tls-execs.ll create mode 100644 test/CodeGen/AArch64/tst-br.ll create mode 100644 test/CodeGen/AArch64/variadic.ll create mode 100644 test/CodeGen/AArch64/zero-reg.ll delete mode 100644 test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/ARM/2012-08-23-legalize-vmull.ll create mode 100644 test/CodeGen/ARM/2012-11-14-subs_carry.ll create mode 100644 test/CodeGen/ARM/2013-01-21-PR14992.ll create mode 100644 test/CodeGen/ARM/2013-02-27-expand-vfma.ll create mode 100644 test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll create mode 100644 test/CodeGen/ARM/DbgValueOtherTargets.test create mode 100644 test/CodeGen/ARM/MergeConsecutiveStores.ll create mode 100644 test/CodeGen/ARM/PR15053.ll create mode 100644 test/CodeGen/ARM/a15-SD-dep.ll create mode 100644 test/CodeGen/ARM/a15-partial-update.ll create mode 100644 test/CodeGen/ARM/alloc-no-stack-realign.ll create mode 100644 test/CodeGen/ARM/arm-ttype-target2.ll create mode 100644 test/CodeGen/ARM/eh-dispcont.ll create mode 100644 test/CodeGen/ARM/ehabi-filters.ll create mode 100644 test/CodeGen/ARM/ehabi-mc-cantunwind.ll create mode 100644 test/CodeGen/ARM/ehabi-mc-section-group.ll create mode 100644 test/CodeGen/ARM/ehabi-mc-section.ll create mode 100644 test/CodeGen/ARM/ehabi-mc-sh_link.ll create mode 100644 test/CodeGen/ARM/ehabi-mc.ll create mode 100644 test/CodeGen/ARM/ehabi-no-landingpad.ll create mode 100644 test/CodeGen/ARM/extload-knownzero.ll create mode 100644 test/CodeGen/ARM/fabs-neon.ll create mode 100644 test/CodeGen/ARM/global-merge-addrspace.ll create mode 100644 test/CodeGen/ARM/indirect-reg-input.ll create mode 100644 test/CodeGen/ARM/inlineasm-64bit.ll create mode 100644 test/CodeGen/ARM/invoke-donothing-assert.ll create mode 100644 test/CodeGen/ARM/memset-inline.ll create mode 100644 test/CodeGen/ARM/neon-spfp.ll create mode 100644 test/CodeGen/ARM/neon_cmp.ll create mode 100644 test/CodeGen/ARM/neon_fpconv.ll create mode 100644 test/CodeGen/ARM/popcnt.ll delete mode 100644 test/CodeGen/ARM/reg_asc_order.ll create mode 100644 test/CodeGen/ARM/ret_sret_vector.ll create mode 100644 test/CodeGen/ARM/sjlj-prepare-critical-edge.ll create mode 100644 test/CodeGen/ARM/vfloatintrinsics.ll create mode 100644 test/CodeGen/ARM/zextload_demandedbits.ll delete mode 100644 test/CodeGen/CellSPU/2009-01-01-BrCond.ll delete mode 100644 test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll delete mode 100644 test/CodeGen/CellSPU/and_ops.ll delete mode 100644 test/CodeGen/CellSPU/arg_ret.ll delete mode 100644 test/CodeGen/CellSPU/bigstack.ll delete mode 100644 test/CodeGen/CellSPU/bss.ll delete mode 100644 test/CodeGen/CellSPU/call.ll delete mode 100644 test/CodeGen/CellSPU/crash.ll delete mode 100644 test/CodeGen/CellSPU/ctpop.ll delete mode 100644 test/CodeGen/CellSPU/div_ops.ll delete mode 100644 test/CodeGen/CellSPU/dp_farith.ll delete mode 100644 test/CodeGen/CellSPU/eqv.ll delete mode 100644 test/CodeGen/CellSPU/extract_elt.ll delete mode 100644 test/CodeGen/CellSPU/fcmp32.ll delete mode 100644 test/CodeGen/CellSPU/fcmp64.ll delete mode 100644 test/CodeGen/CellSPU/fdiv.ll delete mode 100644 test/CodeGen/CellSPU/fneg-fabs.ll delete mode 100644 test/CodeGen/CellSPU/i64ops.ll delete mode 100644 test/CodeGen/CellSPU/i8ops.ll delete mode 100644 test/CodeGen/CellSPU/icmp16.ll delete mode 100644 test/CodeGen/CellSPU/icmp32.ll delete mode 100644 test/CodeGen/CellSPU/icmp64.ll delete mode 100644 test/CodeGen/CellSPU/icmp8.ll delete mode 100644 test/CodeGen/CellSPU/immed16.ll delete mode 100644 test/CodeGen/CellSPU/immed32.ll delete mode 100644 test/CodeGen/CellSPU/immed64.ll delete mode 100644 test/CodeGen/CellSPU/int2fp.ll delete mode 100644 test/CodeGen/CellSPU/intrinsics_branch.ll delete mode 100644 test/CodeGen/CellSPU/intrinsics_float.ll delete mode 100644 test/CodeGen/CellSPU/intrinsics_logical.ll delete mode 100644 test/CodeGen/CellSPU/jumptable.ll delete mode 100644 test/CodeGen/CellSPU/loads.ll delete mode 100644 test/CodeGen/CellSPU/mul-with-overflow.ll delete mode 100644 test/CodeGen/CellSPU/mul_ops.ll delete mode 100644 test/CodeGen/CellSPU/nand.ll delete mode 100644 test/CodeGen/CellSPU/or_ops.ll delete mode 100644 test/CodeGen/CellSPU/private.ll delete mode 100644 test/CodeGen/CellSPU/rotate_ops.ll delete mode 100644 test/CodeGen/CellSPU/select_bits.ll delete mode 100644 test/CodeGen/CellSPU/sext128.ll delete mode 100644 test/CodeGen/CellSPU/shift_ops.ll delete mode 100644 test/CodeGen/CellSPU/shuffles.ll delete mode 100644 test/CodeGen/CellSPU/sp_farith.ll delete mode 100644 test/CodeGen/CellSPU/stores.ll delete mode 100644 test/CodeGen/CellSPU/storestruct.ll delete mode 100644 test/CodeGen/CellSPU/struct_1.ll delete mode 100644 test/CodeGen/CellSPU/sub_ops.ll delete mode 100644 test/CodeGen/CellSPU/trunc.ll delete mode 100644 test/CodeGen/CellSPU/useful-harnesses/README.txt delete mode 100644 test/CodeGen/CellSPU/useful-harnesses/i32operations.c delete mode 100644 test/CodeGen/CellSPU/useful-harnesses/i64operations.c delete mode 100644 test/CodeGen/CellSPU/useful-harnesses/i64operations.h delete mode 100644 test/CodeGen/CellSPU/useful-harnesses/vecoperations.c delete mode 100644 test/CodeGen/CellSPU/v2f32.ll delete mode 100644 test/CodeGen/CellSPU/v2i32.ll delete mode 100644 test/CodeGen/CellSPU/vec_const.ll delete mode 100644 test/CodeGen/CellSPU/vecinsert.ll create mode 100644 test/CodeGen/Generic/2013-03-20-APFloatCrash.ll create mode 100644 test/CodeGen/Generic/dag-combine-crash.ll create mode 100644 test/CodeGen/Generic/inline-asm-mem-clobber.ll create mode 100644 test/CodeGen/Hexagon/absaddr-store.ll create mode 100644 test/CodeGen/Hexagon/adde.ll create mode 100644 test/CodeGen/Hexagon/ashift-left-right.ll create mode 100644 test/CodeGen/Hexagon/block-addr.ll create mode 100644 test/CodeGen/Hexagon/cext-check.ll create mode 100644 test/CodeGen/Hexagon/cext-valid-packet1.ll create mode 100644 test/CodeGen/Hexagon/cext-valid-packet2.ll create mode 100644 test/CodeGen/Hexagon/cmp-to-genreg.ll create mode 100644 test/CodeGen/Hexagon/cmp-to-predreg.ll create mode 100644 test/CodeGen/Hexagon/cmp_pred.ll create mode 100644 test/CodeGen/Hexagon/cmp_pred_reg.ll create mode 100644 test/CodeGen/Hexagon/cmpb_pred.ll create mode 100644 test/CodeGen/Hexagon/combine_ir.ll create mode 100644 test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll create mode 100644 test/CodeGen/Hexagon/gp-plus-offset-load.ll create mode 100644 test/CodeGen/Hexagon/gp-plus-offset-store.ll create mode 100644 test/CodeGen/Hexagon/gp-rel.ll create mode 100644 test/CodeGen/Hexagon/hwloop-cleanup.ll create mode 100644 test/CodeGen/Hexagon/hwloop-const.ll create mode 100644 test/CodeGen/Hexagon/hwloop-dbg.ll create mode 100644 test/CodeGen/Hexagon/hwloop-le.ll create mode 100644 test/CodeGen/Hexagon/hwloop-lt.ll create mode 100644 test/CodeGen/Hexagon/hwloop-lt1.ll create mode 100644 test/CodeGen/Hexagon/hwloop-ne.ll create mode 100644 test/CodeGen/Hexagon/i16_VarArg.ll create mode 100644 test/CodeGen/Hexagon/i1_VarArg.ll create mode 100644 test/CodeGen/Hexagon/i8_VarArg.ll create mode 100644 test/CodeGen/Hexagon/idxload-with-zero-offset.ll create mode 100644 test/CodeGen/Hexagon/indirect-br.ll create mode 100644 test/CodeGen/Hexagon/memops.ll create mode 100644 test/CodeGen/Hexagon/memops1.ll create mode 100644 test/CodeGen/Hexagon/memops2.ll create mode 100644 test/CodeGen/Hexagon/memops3.ll create mode 100644 test/CodeGen/Hexagon/misaligned-access.ll create mode 100644 test/CodeGen/Hexagon/postinc-load.ll create mode 100644 test/CodeGen/Hexagon/postinc-store.ll create mode 100644 test/CodeGen/Hexagon/pred-absolute-store.ll create mode 100644 test/CodeGen/Hexagon/predicate-copy.ll create mode 100644 test/CodeGen/Hexagon/sube.ll create mode 100644 test/CodeGen/Hexagon/validate-offset.ll create mode 100644 test/CodeGen/Hexagon/zextloadi1.ll create mode 100644 test/CodeGen/Inputs/DbgValueOtherTargets.ll delete mode 100644 test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/MBlaze/DbgValueOtherTargets.test delete mode 100644 test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/MSP430/DbgValueOtherTargets.test create mode 100644 test/CodeGen/MSP430/byval.ll create mode 100644 test/CodeGen/MSP430/vararg.ll delete mode 100644 test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll create mode 100644 test/CodeGen/Mips/DbgValueOtherTargets.test create mode 100644 test/CodeGen/Mips/addi.ll create mode 100644 test/CodeGen/Mips/addressing-mode.ll create mode 100644 test/CodeGen/Mips/align16.ll create mode 100644 test/CodeGen/Mips/dsp-patterns.ll create mode 100644 test/CodeGen/Mips/eh-return32.ll create mode 100644 test/CodeGen/Mips/eh-return64.ll create mode 100644 test/CodeGen/Mips/ex2.ll create mode 100644 test/CodeGen/Mips/fp16static.ll create mode 100644 test/CodeGen/Mips/gpreg-lazy-binding.ll create mode 100644 test/CodeGen/Mips/hf16_1.ll create mode 100644 test/CodeGen/Mips/jtstat.ll create mode 100644 test/CodeGen/Mips/largefr1.ll create mode 100644 test/CodeGen/Mips/mips16ex.ll create mode 100644 test/CodeGen/Mips/mips16fpe.ll create mode 100644 test/CodeGen/Mips/mips64-f128-call.ll create mode 100644 test/CodeGen/Mips/mips64-f128.ll create mode 100644 test/CodeGen/Mips/mips64-libcall.ll delete mode 100644 test/CodeGen/Mips/return-vector-float4.ll create mode 100644 test/CodeGen/Mips/selTBteqzCmpi.ll create mode 100644 test/CodeGen/Mips/selTBtnezCmpi.ll create mode 100644 test/CodeGen/Mips/selTBtnezSlti.ll create mode 100644 test/CodeGen/Mips/seleq.ll create mode 100644 test/CodeGen/Mips/seleqk.ll create mode 100644 test/CodeGen/Mips/selgek.ll create mode 100644 test/CodeGen/Mips/selgt.ll create mode 100644 test/CodeGen/Mips/selle.ll create mode 100644 test/CodeGen/Mips/selltk.ll create mode 100644 test/CodeGen/Mips/selne.ll create mode 100644 test/CodeGen/Mips/selnek.ll create mode 100644 test/CodeGen/Mips/vector-setcc.ll delete mode 100644 test/CodeGen/NVPTX/arithmetic-fp-sm10.ll delete mode 100644 test/CodeGen/NVPTX/convert-int-sm10.ll create mode 100644 test/CodeGen/NVPTX/intrin-nocapture.ll create mode 100644 test/CodeGen/NVPTX/nvvm-reflect.ll create mode 100644 test/CodeGen/NVPTX/sched1.ll create mode 100644 test/CodeGen/NVPTX/sched2.ll delete mode 100644 test/CodeGen/NVPTX/sm-version-10.ll delete mode 100644 test/CodeGen/NVPTX/sm-version-11.ll delete mode 100644 test/CodeGen/NVPTX/sm-version-12.ll delete mode 100644 test/CodeGen/NVPTX/sm-version-13.ll create mode 100644 test/CodeGen/NVPTX/tuple-literal.ll create mode 100644 test/CodeGen/NVPTX/vector-args.ll create mode 100644 test/CodeGen/NVPTX/vector-compare.ll create mode 100644 test/CodeGen/NVPTX/vector-loads.ll create mode 100644 test/CodeGen/NVPTX/vector-select.ll delete mode 100644 test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/PowerPC/2012-11-16-mischedcall.ll create mode 100644 test/CodeGen/PowerPC/DbgValueOtherTargets.test create mode 100644 test/CodeGen/PowerPC/a2q-stackalign.ll create mode 100644 test/CodeGen/PowerPC/a2q.ll create mode 100644 test/CodeGen/PowerPC/allocate-r0.ll create mode 100644 test/CodeGen/PowerPC/anon_aggr.ll create mode 100644 test/CodeGen/PowerPC/asym-regclass-copy.ll create mode 100644 test/CodeGen/PowerPC/complex-return.ll create mode 100644 test/CodeGen/PowerPC/cr-spills.ll create mode 100644 test/CodeGen/PowerPC/ctr-cleanup.ll create mode 100644 test/CodeGen/PowerPC/dcbt-sched.ll create mode 100644 test/CodeGen/PowerPC/float-asmprint.ll create mode 100644 test/CodeGen/PowerPC/float-to-int.ll create mode 100644 test/CodeGen/PowerPC/frame-size.ll create mode 100644 test/CodeGen/PowerPC/frameaddr.ll create mode 100644 test/CodeGen/PowerPC/i32-to-float.ll create mode 100644 test/CodeGen/PowerPC/i64-to-float.ll create mode 100644 test/CodeGen/PowerPC/in-asm-f64-reg.ll create mode 100644 test/CodeGen/PowerPC/load-shift-combine.ll create mode 100644 test/CodeGen/PowerPC/mcm-1.ll create mode 100644 test/CodeGen/PowerPC/mcm-10.ll create mode 100644 test/CodeGen/PowerPC/mcm-11.ll create mode 100644 test/CodeGen/PowerPC/mcm-12.ll create mode 100644 test/CodeGen/PowerPC/mcm-2.ll create mode 100644 test/CodeGen/PowerPC/mcm-3.ll create mode 100644 test/CodeGen/PowerPC/mcm-4.ll create mode 100644 test/CodeGen/PowerPC/mcm-5.ll create mode 100644 test/CodeGen/PowerPC/mcm-6.ll create mode 100644 test/CodeGen/PowerPC/mcm-7.ll create mode 100644 test/CodeGen/PowerPC/mcm-8.ll create mode 100644 test/CodeGen/PowerPC/mcm-9.ll create mode 100644 test/CodeGen/PowerPC/mcm-default.ll create mode 100644 test/CodeGen/PowerPC/mcm-obj-2.ll create mode 100644 test/CodeGen/PowerPC/mcm-obj.ll create mode 100644 test/CodeGen/PowerPC/misched-inorder-latency.ll create mode 100644 test/CodeGen/PowerPC/negctr.ll create mode 100644 test/CodeGen/PowerPC/popcnt.ll create mode 100644 test/CodeGen/PowerPC/pr15031.ll create mode 100644 test/CodeGen/PowerPC/pr15359.ll create mode 100644 test/CodeGen/PowerPC/pr15630.ll create mode 100644 test/CodeGen/PowerPC/pr15632.ll create mode 100644 test/CodeGen/PowerPC/pwr3-6x.ll create mode 100644 test/CodeGen/PowerPC/quadint-return.ll create mode 100644 test/CodeGen/PowerPC/r31.ll create mode 100644 test/CodeGen/PowerPC/recipest.ll create mode 100644 test/CodeGen/PowerPC/rounding-ops.ll create mode 100644 test/CodeGen/PowerPC/s000-alias-misched.ll create mode 100644 test/CodeGen/PowerPC/sdag-ppcf128.ll create mode 100644 test/CodeGen/PowerPC/sjlj.ll create mode 100644 test/CodeGen/PowerPC/stdux-constuse.ll create mode 100644 test/CodeGen/PowerPC/store-update.ll create mode 100644 test/CodeGen/PowerPC/svr4-redzone.ll create mode 100644 test/CodeGen/PowerPC/tls-2.ll create mode 100644 test/CodeGen/PowerPC/tls-gd-obj.ll create mode 100644 test/CodeGen/PowerPC/tls-gd.ll create mode 100644 test/CodeGen/PowerPC/tls-ie-obj.ll create mode 100644 test/CodeGen/PowerPC/tls-ie.ll create mode 100644 test/CodeGen/PowerPC/tls-ld-2.ll create mode 100644 test/CodeGen/PowerPC/tls-ld-obj.ll create mode 100644 test/CodeGen/PowerPC/tls-ld.ll create mode 100644 test/CodeGen/PowerPC/unal4-std.ll create mode 100644 test/CodeGen/PowerPC/unaligned.ll create mode 100644 test/CodeGen/PowerPC/vaddsplat.ll create mode 100644 test/CodeGen/PowerPC/vec_rounding.ll create mode 100644 test/CodeGen/PowerPC/vec_select.ll create mode 100644 test/CodeGen/PowerPC/vrsave-spill.ll create mode 100644 test/CodeGen/R600/128bit-kernel-args.ll create mode 100644 test/CodeGen/R600/add.v4i32.ll create mode 100644 test/CodeGen/R600/alu-split.ll create mode 100644 test/CodeGen/R600/and.v4i32.ll create mode 100644 test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll create mode 100644 test/CodeGen/R600/disconnected-predset-break-bug.ll create mode 100644 test/CodeGen/R600/fabs.ll create mode 100644 test/CodeGen/R600/fadd.ll create mode 100644 test/CodeGen/R600/fadd.v4f32.ll create mode 100644 test/CodeGen/R600/fcmp-cnd.ll create mode 100644 test/CodeGen/R600/fcmp-cnde-int-args.ll create mode 100644 test/CodeGen/R600/fcmp.ll create mode 100644 test/CodeGen/R600/fdiv.v4f32.ll create mode 100644 test/CodeGen/R600/floor.ll create mode 100644 test/CodeGen/R600/fmad.ll create mode 100644 test/CodeGen/R600/fmax.ll create mode 100644 test/CodeGen/R600/fmin.ll create mode 100644 test/CodeGen/R600/fmul.ll create mode 100644 test/CodeGen/R600/fmul.v4f32.ll create mode 100644 test/CodeGen/R600/fsub.ll create mode 100644 test/CodeGen/R600/fsub.v4f32.ll create mode 100644 test/CodeGen/R600/i8_to_double_to_float.ll create mode 100644 test/CodeGen/R600/icmp-select-sete-reverse-args.ll create mode 100644 test/CodeGen/R600/jump_address.ll create mode 100644 test/CodeGen/R600/kcache-fold.ll create mode 100644 test/CodeGen/R600/legalizedag-bug-expand-setcc.ll create mode 100644 test/CodeGen/R600/lit.local.cfg create mode 100644 test/CodeGen/R600/literals.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.mul.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.tex.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.trunc.ll create mode 100644 test/CodeGen/R600/llvm.SI.fs.interp.constant.ll create mode 100644 test/CodeGen/R600/llvm.SI.sample.ll create mode 100644 test/CodeGen/R600/llvm.cos.ll create mode 100644 test/CodeGen/R600/llvm.pow.ll create mode 100644 test/CodeGen/R600/llvm.sin.ll create mode 100644 test/CodeGen/R600/load.constant_addrspace.f32.ll create mode 100644 test/CodeGen/R600/load.i8.ll create mode 100644 test/CodeGen/R600/lshl.ll create mode 100644 test/CodeGen/R600/lshr.ll create mode 100644 test/CodeGen/R600/mulhu.ll create mode 100644 test/CodeGen/R600/predicates.ll create mode 100644 test/CodeGen/R600/reciprocal.ll create mode 100644 test/CodeGen/R600/schedule-fs-loop-nested-if.ll create mode 100644 test/CodeGen/R600/schedule-fs-loop-nested.ll create mode 100644 test/CodeGen/R600/schedule-fs-loop.ll create mode 100644 test/CodeGen/R600/schedule-if-2.ll create mode 100644 test/CodeGen/R600/schedule-if.ll create mode 100644 test/CodeGen/R600/schedule-vs-if-nested-loop.ll create mode 100644 test/CodeGen/R600/sdiv.ll create mode 100644 test/CodeGen/R600/selectcc-icmp-select-float.ll create mode 100644 test/CodeGen/R600/selectcc-opt.ll create mode 100644 test/CodeGen/R600/selectcc_cnde.ll create mode 100644 test/CodeGen/R600/selectcc_cnde_int.ll create mode 100644 test/CodeGen/R600/set-dx10.ll create mode 100644 test/CodeGen/R600/setcc.v4i32.ll create mode 100644 test/CodeGen/R600/seto.ll create mode 100644 test/CodeGen/R600/setuo.ll create mode 100644 test/CodeGen/R600/short-args.ll create mode 100644 test/CodeGen/R600/store.v4f32.ll create mode 100644 test/CodeGen/R600/store.v4i32.ll create mode 100644 test/CodeGen/R600/udiv.v4i32.ll create mode 100644 test/CodeGen/R600/unsupported-cc.ll create mode 100644 test/CodeGen/R600/urem.v4i32.ll create mode 100644 test/CodeGen/R600/vec4-expand.ll create mode 100644 test/CodeGen/SI/sanity.ll delete mode 100644 test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/SPARC/64bit.ll create mode 100644 test/CodeGen/SPARC/64cond.ll create mode 100644 test/CodeGen/SPARC/DbgValueOtherTargets.test delete mode 100644 test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/Thumb/DbgValueOtherTargets.test create mode 100644 test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll create mode 100644 test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll create mode 100644 test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll create mode 100644 test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll delete mode 100644 test/CodeGen/X86/2008-10-27-StackRealignment.ll delete mode 100644 test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll delete mode 100644 test/CodeGen/X86/2010-08-10-DbgConstant.ll rename test/CodeGen/{Generic => X86}/2012-07-15-BuildVectorPromote.ll (84%) create mode 100644 test/CodeGen/X86/2012-11-28-merge-store-alias.ll create mode 100644 test/CodeGen/X86/2012-11-30-handlemove-dbg.ll create mode 100644 test/CodeGen/X86/2012-11-30-misched-dbg.ll create mode 100644 test/CodeGen/X86/2012-11-30-regpres-dbg.ll create mode 100644 test/CodeGen/X86/2012-12-06-python27-miscompile.ll create mode 100644 test/CodeGen/X86/2012-12-1-merge-multiple.ll create mode 100644 test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll create mode 100644 test/CodeGen/X86/2012-12-14-v8fp80-crash.ll create mode 100644 test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll create mode 100644 test/CodeGen/X86/2013-01-09-DAGCombineBug.ll create mode 100644 test/CodeGen/X86/2013-02-12-ShuffleToZext.ll create mode 100644 test/CodeGen/X86/2013-03-13-VEX-DestReg.ll create mode 100644 test/CodeGen/X86/DbgValueOtherTargets.test create mode 100644 test/CodeGen/X86/GC/erlang-gc.ll create mode 100644 test/CodeGen/X86/GC/ocaml-gc.ll create mode 100644 test/CodeGen/X86/WidenArith.ll create mode 100644 test/CodeGen/X86/atom-bypass-slow-division-64.ll create mode 100644 test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll create mode 100644 test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll create mode 100644 test/CodeGen/X86/atom-call-reg-indirect.ll create mode 100644 test/CodeGen/X86/atom-pad-short-functions.ll create mode 100644 test/CodeGen/X86/atomic-dagsched.ll create mode 100644 test/CodeGen/X86/cas.ll create mode 100644 test/CodeGen/X86/clobber-fi0.ll create mode 100644 test/CodeGen/X86/coalesce-implicitdef.ll create mode 100644 test/CodeGen/X86/coldcc64.ll create mode 100644 test/CodeGen/X86/dagcombine_unsafe_math.ll delete mode 100644 test/CodeGen/X86/dbg-value-inlined-parameter.ll create mode 100644 test/CodeGen/X86/fast-isel-args-fail.ll create mode 100644 test/CodeGen/X86/fast-isel-args.ll create mode 100644 test/CodeGen/X86/fast-isel-constant.ll create mode 100644 test/CodeGen/X86/fast-isel-expect.ll create mode 100644 test/CodeGen/X86/float-asmprint.ll create mode 100644 test/CodeGen/X86/fold-load-vec.ll create mode 100644 test/CodeGen/X86/fold-vex.ll create mode 100644 test/CodeGen/X86/hipe-cc.ll create mode 100644 test/CodeGen/X86/hipe-cc64.ll create mode 100644 test/CodeGen/X86/hipe-prologue.ll create mode 100644 test/CodeGen/X86/imul64-lea.ll create mode 100644 test/CodeGen/X86/memset-sse-stack-realignment.ll create mode 100644 test/CodeGen/X86/misched-crash.ll create mode 100644 test/CodeGen/X86/misched-matmul.ll create mode 100644 test/CodeGen/X86/misched-matrix.ll create mode 100644 test/CodeGen/X86/no-cmov.ll create mode 100644 test/CodeGen/X86/pmovsx-inreg.ll create mode 100644 test/CodeGen/X86/pr10475.ll create mode 100644 test/CodeGen/X86/pr10499.ll create mode 100644 test/CodeGen/X86/pr10523.ll create mode 100644 test/CodeGen/X86/pr10524.ll create mode 100644 test/CodeGen/X86/pr10525.ll create mode 100644 test/CodeGen/X86/pr10526.ll create mode 100644 test/CodeGen/X86/pr11998.ll create mode 100644 test/CodeGen/X86/pr14562.ll create mode 100644 test/CodeGen/X86/pr15267.ll create mode 100644 test/CodeGen/X86/pr15296.ll create mode 100644 test/CodeGen/X86/pr15309.ll create mode 100644 test/CodeGen/X86/pre-ra-sched.ll create mode 100644 test/CodeGen/X86/psubus.ll create mode 100644 test/CodeGen/X86/rdseed.ll create mode 100644 test/CodeGen/X86/rip-rel-lea.ll create mode 100644 test/CodeGen/X86/sandybridge-loads.ll create mode 100644 test/CodeGen/X86/sincos-opt.ll create mode 100644 test/CodeGen/X86/sse2-mul.ll create mode 100644 test/CodeGen/X86/stack-align-memcpy.ll create mode 100644 test/CodeGen/X86/stack-update-frame-opcode.ll create mode 100644 test/CodeGen/X86/subtarget-feature-change.ll delete mode 100644 test/CodeGen/X86/thiscall-struct-return.ll create mode 100644 test/CodeGen/X86/v8i1-masks.ll create mode 100644 test/CodeGen/X86/vec_align_i256.ll create mode 100644 test/CodeGen/X86/vec_sdiv_to_shift.ll delete mode 100644 test/CodeGen/X86/vec_splat-4.ll create mode 100644 test/CodeGen/X86/vselect-minmax.ll create mode 100644 test/CodeGen/X86/wide-fma-contraction.ll create mode 100644 test/CodeGen/X86/x86-64-ptr-arg-simple.ll create mode 100644 test/CodeGen/X86/xtest.ll delete mode 100644 test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll create mode 100644 test/CodeGen/XCore/DbgValueOtherTargets.test create mode 100644 test/CodeGen/XCore/aliases.ll create mode 100644 test/DebugInfo/AArch64/cfi-frame.ll create mode 100644 test/DebugInfo/AArch64/dwarfdump.ll create mode 100644 test/DebugInfo/AArch64/eh_frame.ll create mode 100644 test/DebugInfo/AArch64/eh_frame_personality.ll create mode 100644 test/DebugInfo/AArch64/lit.local.cfg create mode 100644 test/DebugInfo/AArch64/variable-loc.ll create mode 100644 test/DebugInfo/Inputs/dwarfdump-inl-test.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-inl-test.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-pubnames.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64 create mode 100644 test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c create mode 100644 test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-test.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-test2-helper.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-test2-main.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-test3-decl.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-test3-decl2.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-test3.cc delete mode 100755 test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 create mode 100755 test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space create mode 100644 test/DebugInfo/Inputs/dwarfdump-test4-decl.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-test4-part1.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-test4-part2.cc rename test/{CodeGen/CellSPU/useful-harnesses => DebugInfo/Inputs}/lit.local.cfg (100%) create mode 100644 test/DebugInfo/Inputs/test-inline.o create mode 100644 test/DebugInfo/Inputs/test-parameters.o create mode 100644 test/DebugInfo/X86/2010-08-10-DbgConstant.ll create mode 100644 test/DebugInfo/X86/dbg-value-inlined-parameter.ll create mode 100644 test/DebugInfo/X86/debug-info-block-captured-self.ll create mode 100644 test/DebugInfo/X86/debug-info-blocks.ll create mode 100644 test/DebugInfo/X86/debug-info-static-member.ll create mode 100644 test/DebugInfo/X86/empty-and-one-elem-array.ll create mode 100644 test/DebugInfo/X86/empty-array.ll create mode 100644 test/DebugInfo/X86/fission-cu.ll create mode 100644 test/DebugInfo/X86/line-info.ll create mode 100644 test/DebugInfo/X86/main-file-name.s create mode 100644 test/DebugInfo/X86/misched-dbg-value.ll create mode 100644 test/DebugInfo/X86/multiple-at-const-val.ll create mode 100644 test/DebugInfo/X86/nondefault-subrange-array.ll create mode 100644 test/DebugInfo/X86/pr13303.ll create mode 100644 test/DebugInfo/X86/stmt-list-multiple-compile-units.ll create mode 100644 test/DebugInfo/X86/subrange-type.ll create mode 100644 test/DebugInfo/X86/union-template.ll create mode 100644 test/DebugInfo/X86/vector.ll create mode 100644 test/DebugInfo/debuglineinfo.test create mode 100644 test/DebugInfo/dwarf-public-names.ll create mode 100644 test/DebugInfo/dwarfdump-debug-frame-simple.test create mode 100644 test/DebugInfo/dwarfdump-dump-flags.test create mode 100644 test/DebugInfo/dwarfdump-pubnames.test create mode 100644 test/DebugInfo/llvm-symbolizer.test create mode 100644 test/DebugInfo/member-pointers.ll create mode 100644 test/DebugInfo/namespace.ll delete mode 100644 test/DebugInfo/printdbginfo2.ll create mode 100644 test/DebugInfo/two-cus-from-same-file.ll create mode 100644 test/ExecutionEngine/MCJIT/simpletest-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/stubs-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/test-data-align-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/test-global-ctors.ll create mode 100644 test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll create mode 100644 test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll create mode 100644 test/ExecutionEngine/test-interp-vec-loadstore.ll create mode 100644 test/Feature/attributes.ll create mode 100644 test/FileCheck/dos-style-eol.txt create mode 100644 test/FileCheck/lit.local.cfg create mode 100644 test/FileCheck/next-no-match.txt create mode 100644 test/FileCheck/regex-brackets.txt create mode 100644 test/FileCheck/regex-no-match.txt create mode 100644 test/FileCheck/simple-var-capture.txt create mode 100644 test/FileCheck/two-checks-for-same-match.txt create mode 100644 test/FileCheck/var-ref-same-line.txt create mode 100644 test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll create mode 100644 test/Instrumentation/AddressSanitizer/debug_info.ll create mode 100644 test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll create mode 100644 test/Instrumentation/AddressSanitizer/lifetime.ll create mode 100644 test/Instrumentation/MemorySanitizer/lit.local.cfg create mode 100644 test/Instrumentation/MemorySanitizer/msan_basic.ll create mode 100644 test/Instrumentation/MemorySanitizer/unreachable.ll create mode 100644 test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll create mode 100644 test/Instrumentation/ThreadSanitizer/vptr_read.ll create mode 100644 test/JitListener/lit.local.cfg create mode 100644 test/JitListener/test-common-symbols.ll create mode 100644 test/JitListener/test-inline.ll create mode 100644 test/JitListener/test-parameters.ll create mode 100644 test/Linker/DbgDeclare.ll create mode 100644 test/Linker/DbgDeclare2.ll create mode 100644 test/Linker/module-flags-7-a.ll create mode 100644 test/Linker/module-flags-7-b.ll create mode 100644 test/Linker/module-flags-8-a.ll create mode 100644 test/Linker/module-flags-8-b.ll create mode 100644 test/MC/AArch64/basic-a64-diagnostics.s create mode 100644 test/MC/AArch64/basic-a64-instructions.s create mode 100644 test/MC/AArch64/elf-globaladdress.ll create mode 100644 test/MC/AArch64/elf-objdump.s create mode 100644 test/MC/AArch64/elf-reloc-addsubimm.s create mode 100644 test/MC/AArch64/elf-reloc-condbr.s create mode 100644 test/MC/AArch64/elf-reloc-ldrlit.s create mode 100644 test/MC/AArch64/elf-reloc-ldstunsimm.s create mode 100644 test/MC/AArch64/elf-reloc-movw.s create mode 100644 test/MC/AArch64/elf-reloc-pcreladdressing.s create mode 100644 test/MC/AArch64/elf-reloc-tstb.s create mode 100644 test/MC/AArch64/elf-reloc-uncondbrimm.s create mode 100644 test/MC/AArch64/gicv3-regs-diagnostics.s create mode 100644 test/MC/AArch64/gicv3-regs.s create mode 100644 test/MC/AArch64/lit.local.cfg create mode 100644 test/MC/AArch64/mapping-across-sections.s create mode 100644 test/MC/AArch64/mapping-within-section.s create mode 100644 test/MC/AArch64/tls-relocs.s create mode 100644 test/MC/AArch64/trace-regs-diagnostics.s create mode 100644 test/MC/AArch64/trace-regs.s create mode 100644 test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s create mode 100644 test/MC/ARM/AlignedBundling/group-bundle-arm.s create mode 100644 test/MC/ARM/AlignedBundling/lit.local.cfg create mode 100644 test/MC/ARM/AlignedBundling/pad-align-to-bundle-end.s create mode 100644 test/MC/ARM/data-in-code.ll create mode 100644 test/MC/ARM/elf-eflags-eabi-cg.ll create mode 100644 test/MC/ARM/mapping-within-section.s create mode 100644 test/MC/ARM/multi-section-mapping.s create mode 100644 test/MC/ARM/relocated-mapping.s create mode 100644 test/MC/AsmParser/align_invalid.s create mode 100644 test/MC/AsmParser/section_names.s create mode 100644 test/MC/COFF/weak-symbol-section-specification.ll create mode 100644 test/MC/Disassembler/AArch64/a64-ignored-fields.txt create mode 100644 test/MC/Disassembler/AArch64/basic-a64-instructions.txt create mode 100644 test/MC/Disassembler/AArch64/basic-a64-undefined.txt create mode 100644 test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt create mode 100644 test/MC/Disassembler/AArch64/gicv3-regs.txt create mode 100644 test/MC/Disassembler/AArch64/ldp-offset-predictable.txt create mode 100644 test/MC/Disassembler/AArch64/ldp-postind.predictable.txt create mode 100644 test/MC/Disassembler/AArch64/ldp-preind.predictable.txt create mode 100644 test/MC/Disassembler/AArch64/lit.local.cfg create mode 100644 test/MC/Disassembler/AArch64/trace-regs.txt create mode 100644 test/MC/Disassembler/ARM/hex-immediates.txt create mode 100644 test/MC/Disassembler/ARM/unpredictable-BFI.txt delete mode 100644 test/MC/Disassembler/X86/enhanced.txt create mode 100644 test/MC/Disassembler/X86/hex-immediates.txt create mode 100644 test/MC/Disassembler/X86/intel-syntax-32.txt create mode 100644 test/MC/Disassembler/XCore/lit.local.cfg create mode 100644 test/MC/Disassembler/XCore/xcore.txt create mode 100644 test/MC/ELF/cfi-register.s create mode 100644 test/MC/ELF/cfi-undefined.s create mode 100644 test/MC/ELF/comp-dir.s create mode 100644 test/MC/ELF/many-sections-2.s create mode 100644 test/MC/ELF/relax-all-flag.s create mode 100644 test/MC/MachO/bad-dollar.s create mode 100644 test/MC/MachO/bad-macro.s create mode 100644 test/MC/MachO/gen-dwarf-producer.s create mode 100644 test/MC/MachO/linker-option-1.s create mode 100644 test/MC/MachO/linker-option-2.s create mode 100644 test/MC/MachO/linker-options.ll create mode 100644 test/MC/Mips/eh-frame.s create mode 100644 test/MC/Mips/elf-gprel-32-64.ll create mode 100644 test/MC/Mips/elf-reginfo.ll create mode 100644 test/MC/Mips/elf_eflags.ll create mode 100644 test/MC/Mips/elf_st_other.ll create mode 100644 test/MC/Mips/hilo-addressing.s create mode 100644 test/MC/Mips/mips64-alu-instructions.s create mode 100644 test/MC/Mips/mips_gprel16.ll create mode 100644 test/MC/Mips/nabi-regs.s create mode 100644 test/MC/Mips/set-at-directive.s create mode 100644 test/MC/X86/AlignedBundling/align-mode-argument-error.s create mode 100644 test/MC/X86/AlignedBundling/asm-printing-bundle-directives.s create mode 100644 test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s create mode 100644 test/MC/X86/AlignedBundling/autogen-inst-offset-padding.s create mode 100644 test/MC/X86/AlignedBundling/bundle-group-too-large-error.s create mode 100644 test/MC/X86/AlignedBundling/bundle-lock-option-error.s create mode 100644 test/MC/X86/AlignedBundling/different-sections.s create mode 100644 test/MC/X86/AlignedBundling/lit.local.cfg create mode 100644 test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s create mode 100644 test/MC/X86/AlignedBundling/long-nop-pad.s create mode 100644 test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s create mode 100644 test/MC/X86/AlignedBundling/pad-bundle-groups.s create mode 100644 test/MC/X86/AlignedBundling/relax-at-bundle-end.s create mode 100644 test/MC/X86/AlignedBundling/relax-in-bundle-group.s create mode 100644 test/MC/X86/AlignedBundling/single-inst-bundling.s create mode 100644 test/MC/X86/AlignedBundling/switch-section-locked-error.s create mode 100644 test/MC/X86/AlignedBundling/unlock-without-lock-error.s create mode 100644 test/MC/X86/fde-reloc.s create mode 100644 test/MC/X86/gnux32-dwarf-gen.s create mode 100644 test/MC/X86/intel-syntax-hex.s create mode 100644 test/MC/X86/shuffle-comments.s create mode 100644 test/MC/X86/x86_64-rand-encoding.s create mode 100644 test/MC/X86/x86_long_nop.s create mode 100644 test/Object/ARM/symbol-addr.ll create mode 100755 test/Object/Inputs/coff_archive.lib create mode 100644 test/Object/Inputs/liblong_filenames.a create mode 100644 test/Object/Inputs/libsimple_archive.a create mode 100644 test/Object/Inputs/macho-text-sections.macho-x86_64 create mode 100644 test/Object/Inputs/program-headers.elf-i386 create mode 100644 test/Object/Inputs/program-headers.elf-x86-64 create mode 100644 test/Object/Inputs/trivial-object-test.elf-mips64el create mode 100644 test/Object/X86/macho-text-sections.test create mode 100644 test/Object/archive-long-index.test create mode 100644 test/Object/coff-archive.test create mode 100644 test/Object/obj2yaml.test create mode 100644 test/Object/objdump-private-headers.test create mode 100644 test/Object/readobj.test create mode 100644 test/Object/simple-archive.test create mode 100644 test/Object/yaml2obj-readobj.test create mode 100644 test/Other/extract-linkonce.ll create mode 100644 test/TableGen/math.td create mode 100644 test/Transforms/BBVectorize/X86/pr15289.ll create mode 100644 test/Transforms/BBVectorize/X86/simple-int.ll create mode 100644 test/Transforms/EarlyCSE/floatingpoint.ll create mode 100644 test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll create mode 100644 test/Transforms/FunctionAttrs/annotate-1.ll create mode 100644 test/Transforms/FunctionAttrs/noreturn.ll create mode 100644 test/Transforms/GCOVProfiling/linkagename.ll create mode 100644 test/Transforms/GCOVProfiling/lit.local.cfg create mode 100644 test/Transforms/GCOVProfiling/version.ll create mode 100644 test/Transforms/GVN/MemdepMiscompile.ll create mode 100644 test/Transforms/GlobalOpt/crash-2.ll create mode 100644 test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll create mode 100644 test/Transforms/IndVarSimplify/dont-recompute.ll create mode 100644 test/Transforms/Inline/inline_minisize.ll create mode 100644 test/Transforms/Inline/inline_ssp.ll create mode 100644 test/Transforms/Inline/lifetime-no-datalayout.ll create mode 100644 test/Transforms/InstCombine/2009-02-11-NotInitialized.ll rename test/Transforms/{SimplifyLibCalls => InstCombine}/2010-05-30-memcpy-Struct.ll (78%) create mode 100644 test/Transforms/InstCombine/2012-12-14-simp-vgep.ll create mode 100644 test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll create mode 100644 test/Transforms/InstCombine/abs-1.ll create mode 100644 test/Transforms/InstCombine/bitcast-bigendian.ll create mode 100644 test/Transforms/InstCombine/constant-expr-datalayout.ll create mode 100644 test/Transforms/InstCombine/cos-1.ll create mode 100644 test/Transforms/InstCombine/cos-2.ll rename test/Transforms/{SimplifyLibCalls => InstCombine}/debug-line.ll (95%) rename test/Transforms/{SimplifyLibCalls/double-float-shrink.ll => InstCombine/double-float-shrink-1.ll} (54%) create mode 100644 test/Transforms/InstCombine/double-float-shrink-2.ll create mode 100644 test/Transforms/InstCombine/exp2-1.ll create mode 100644 test/Transforms/InstCombine/exp2-2.ll create mode 100644 test/Transforms/InstCombine/fast-math.ll create mode 100644 test/Transforms/InstCombine/ffs-1.ll create mode 100644 test/Transforms/InstCombine/fmul.ll create mode 100644 test/Transforms/InstCombine/fold-phi.ll create mode 100644 test/Transforms/InstCombine/fprintf-1.ll create mode 100644 test/Transforms/InstCombine/fputs-1.ll create mode 100644 test/Transforms/InstCombine/fwrite-1.ll create mode 100644 test/Transforms/InstCombine/isascii-1.ll create mode 100644 test/Transforms/InstCombine/isdigit-1.ll rename test/Transforms/{SimplifyLibCalls => InstCombine}/osx-names.ll (96%) create mode 100644 test/Transforms/InstCombine/pow-1.ll create mode 100644 test/Transforms/InstCombine/pow-2.ll create mode 100644 test/Transforms/InstCombine/printf-1.ll create mode 100644 test/Transforms/InstCombine/printf-2.ll create mode 100644 test/Transforms/InstCombine/puts-1.ll create mode 100644 test/Transforms/InstCombine/sprintf-1.ll create mode 100644 test/Transforms/InstCombine/toascii-1.ll create mode 100644 test/Transforms/InstCombine/vector-type.ll create mode 100644 test/Transforms/InstSimplify/call-callconv.ll create mode 100644 test/Transforms/InstSimplify/call.ll create mode 100644 test/Transforms/InstSimplify/fast-math.ll create mode 100644 test/Transforms/InstSimplify/floating-point-arithmetic.ll create mode 100644 test/Transforms/InstSimplify/past-the-end.ll create mode 100644 test/Transforms/LoopIdiom/X86/lit.local.cfg create mode 100644 test/Transforms/LoopIdiom/X86/popcnt.ll create mode 100644 test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll create mode 100644 test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll rename test/Transforms/LoopStrengthReduce/{ => X86}/2008-08-14-ShadowIV.ll (96%) rename test/Transforms/LoopStrengthReduce/{ => X86}/2011-07-20-DoubleIV.ll (92%) create mode 100644 test/Transforms/LoopVectorize/12-12-11-if-conv.ll create mode 100644 test/Transforms/LoopVectorize/ARM/arm-unroll.ll create mode 100644 test/Transforms/LoopVectorize/ARM/gcc-examples.ll create mode 100644 test/Transforms/LoopVectorize/ARM/lit.local.cfg create mode 100644 test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll create mode 100644 test/Transforms/LoopVectorize/ARM/width-detect.ll create mode 100644 test/Transforms/LoopVectorize/X86/constant-vector-operand.ll create mode 100644 test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll create mode 100644 test/Transforms/LoopVectorize/X86/no-vector.ll create mode 100644 test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll create mode 100644 test/Transforms/LoopVectorize/X86/parallel-loops.ll create mode 100644 test/Transforms/LoopVectorize/X86/reduction-crash.ll create mode 100644 test/Transforms/LoopVectorize/X86/small-size.ll create mode 100644 test/Transforms/LoopVectorize/X86/struct-store.ll create mode 100644 test/Transforms/LoopVectorize/X86/unroll-small-loops.ll create mode 100644 test/Transforms/LoopVectorize/X86/unroll_selection.ll create mode 100644 test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll create mode 100644 test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll create mode 100644 test/Transforms/LoopVectorize/bzip_reverse_loops.ll create mode 100644 test/Transforms/LoopVectorize/calloc.ll create mode 100644 test/Transforms/LoopVectorize/cast-induction.ll create mode 100644 test/Transforms/LoopVectorize/dbg.value.ll create mode 100644 test/Transforms/LoopVectorize/float-reduction.ll create mode 100644 test/Transforms/LoopVectorize/global_alias.ll create mode 100644 test/Transforms/LoopVectorize/i8-induction.ll create mode 100644 test/Transforms/LoopVectorize/if-conv-crash.ll create mode 100644 test/Transforms/LoopVectorize/if-conversion-reduction.ll create mode 100644 test/Transforms/LoopVectorize/if-conversion.ll create mode 100644 test/Transforms/LoopVectorize/intrinsic.ll create mode 100644 test/Transforms/LoopVectorize/lcssa-crash.ll create mode 100644 test/Transforms/LoopVectorize/no_int_induction.ll create mode 100644 test/Transforms/LoopVectorize/nofloat.ll create mode 100644 test/Transforms/LoopVectorize/nsw-crash.ll create mode 100644 test/Transforms/LoopVectorize/phi-hang.ll create mode 100644 test/Transforms/LoopVectorize/ptr_loops.ll create mode 100644 test/Transforms/LoopVectorize/same-base-access.ll create mode 100644 test/Transforms/LoopVectorize/simple-unroll.ll create mode 100644 test/Transforms/LoopVectorize/struct_access.ll create mode 100644 test/Transforms/LoopVectorize/vectorize-once.ll create mode 100644 test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll create mode 100644 test/Transforms/ObjCARC/arc-annotations.ll create mode 100644 test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll create mode 100644 test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll create mode 100644 test/Transforms/ObjCARC/intrinsic-use.ll create mode 100644 test/Transforms/ObjCARC/retain-block-escape-analysis.ll create mode 100644 test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll create mode 100644 test/Transforms/Reassociate/xor_reassoc.ll create mode 100644 test/Transforms/Reg2Mem/crash.ll create mode 100644 test/Transforms/Reg2Mem/lit.local.cfg create mode 100644 test/Transforms/SROA/vectors-of-pointers.ll rename test/Transforms/SimplifyCFG/{2002-05-05-EmptyBlockMerge.ll => EmptyBlockMerge.ll} (86%) rename test/Transforms/SimplifyCFG/{2002-06-24-PHINode.ll => PHINode.ll} (77%) create mode 100644 test/Transforms/SimplifyCFG/trivial-throw.ll create mode 100644 test/Transforms/SimplifyCFG/volatile-phioper.ll delete mode 100644 test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll delete mode 100644 test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll delete mode 100644 test/Transforms/SimplifyLibCalls/FFS.ll delete mode 100644 test/Transforms/SimplifyLibCalls/FPrintF.ll delete mode 100644 test/Transforms/SimplifyLibCalls/FPuts.ll delete mode 100644 test/Transforms/SimplifyLibCalls/IsDigit.ll delete mode 100644 test/Transforms/SimplifyLibCalls/Printf.ll delete mode 100644 test/Transforms/SimplifyLibCalls/Puts.ll delete mode 100644 test/Transforms/SimplifyLibCalls/SPrintF.ll delete mode 100644 test/Transforms/SimplifyLibCalls/ToAscii.ll delete mode 100644 test/Transforms/SimplifyLibCalls/abs.ll delete mode 100644 test/Transforms/SimplifyLibCalls/cos.ll delete mode 100644 test/Transforms/SimplifyLibCalls/exp2.ll delete mode 100644 test/Transforms/SimplifyLibCalls/floor.ll delete mode 100644 test/Transforms/SimplifyLibCalls/fwrite.ll delete mode 100644 test/Transforms/SimplifyLibCalls/iprintf.ll delete mode 100644 test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll delete mode 100644 test/Transforms/SimplifyLibCalls/pow2.ll create mode 100644 test/Verifier/module-flags-1.ll create mode 100644 test/tools/llvm-lit/chain.c create mode 100644 test/tools/llvm-lit/lit.local.cfg create mode 100644 test/tools/llvm-objdump/disassembly-show-raw.s create mode 100644 test/tools/llvm-objdump/lit.local.cfg create mode 100644 test/tools/llvm-objdump/win64-unwind-data.s create mode 100644 test/tools/llvm-readobj/Inputs/trivial.ll create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64 create mode 100644 test/tools/llvm-readobj/file-headers.test create mode 100644 test/tools/llvm-readobj/lit.local.cfg create mode 100644 test/tools/llvm-readobj/relocations.test create mode 100644 test/tools/llvm-readobj/sections-ext.test create mode 100644 test/tools/llvm-readobj/sections.test create mode 100644 test/tools/llvm-readobj/symbols.test create mode 100644 tools/llvm-jitlistener/CMakeLists.txt create mode 100644 tools/llvm-jitlistener/LLVMBuild.txt create mode 100644 tools/llvm-jitlistener/Makefile create mode 100644 tools/llvm-jitlistener/llvm-jitlistener.cpp create mode 100644 tools/llvm-objdump/COFFDump.cpp create mode 100644 tools/llvm-objdump/ELFDump.cpp create mode 100644 tools/llvm-readobj/COFFDumper.cpp create mode 100644 tools/llvm-readobj/ELFDumper.cpp create mode 100644 tools/llvm-readobj/Error.cpp create mode 100644 tools/llvm-readobj/Error.h create mode 100644 tools/llvm-readobj/MachODumper.cpp create mode 100644 tools/llvm-readobj/ObjDumper.cpp create mode 100644 tools/llvm-readobj/ObjDumper.h create mode 100644 tools/llvm-readobj/StreamWriter.cpp create mode 100644 tools/llvm-readobj/StreamWriter.h create mode 100644 tools/llvm-readobj/llvm-readobj.h create mode 100644 tools/llvm-symbolizer/CMakeLists.txt create mode 100644 tools/llvm-symbolizer/LLVMSymbolize.cpp create mode 100644 tools/llvm-symbolizer/LLVMSymbolize.h create mode 100644 tools/llvm-symbolizer/Makefile create mode 100644 tools/llvm-symbolizer/llvm-symbolizer.cpp create mode 100644 tools/lto/LTODisassembler.cpp rename {utils => tools}/obj2yaml/CMakeLists.txt (100%) rename {utils => tools}/obj2yaml/Makefile (93%) rename {utils => tools}/obj2yaml/coff2yaml.cpp (99%) rename {utils => tools}/obj2yaml/obj2yaml.cpp (99%) rename {utils => tools}/obj2yaml/obj2yaml.h (99%) create mode 100644 unittests/ADT/MapVectorTest.cpp create mode 100644 unittests/ADT/OptionalTest.cpp create mode 100644 unittests/ADT/SparseMultiSetTest.cpp create mode 100644 unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp delete mode 100644 unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp delete mode 100644 unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h create mode 100644 unittests/IR/AttributesTest.cpp rename unittests/{VMCore => IR}/CMakeLists.txt (81%) create mode 100644 unittests/IR/ConstantsTest.cpp rename unittests/{VMCore => IR}/DominatorTreeTest.cpp (92%) rename unittests/{VMCore => IR}/IRBuilderTest.cpp (58%) rename unittests/{VMCore => IR}/InstructionsTest.cpp (90%) rename unittests/{VMCore => IR}/MDBuilderTest.cpp (97%) rename unittests/{VMCore => IR}/Makefile (83%) rename unittests/{VMCore => IR}/MetadataTest.cpp (94%) rename unittests/{VMCore => IR}/PassManagerTest.cpp (93%) rename unittests/{VMCore => IR}/TypeBuilderTest.cpp (99%) rename unittests/{VMCore => IR}/TypesTest.cpp (84%) rename unittests/{VMCore => IR}/ValueMapTest.cpp (98%) rename unittests/{VMCore => IR}/VerifierTest.cpp (85%) create mode 100644 unittests/IR/WaymarkTest.cpp create mode 100644 unittests/Option/CMakeLists.txt create mode 100644 unittests/Option/OptionParsingTest.cpp create mode 100644 unittests/Option/Opts.td create mode 100644 unittests/Support/ArrayRecyclerTest.cpp create mode 100644 unittests/Support/ErrorOrTest.cpp create mode 100644 unittests/Support/ProcessTest.cpp create mode 100644 unittests/Support/YAMLIOTest.cpp delete mode 100644 unittests/VMCore/ConstantsTest.cpp create mode 100644 utils/TableGen/CTagsEmitter.cpp delete mode 100644 utils/TableGen/EDEmitter.cpp create mode 100644 utils/TableGen/OptParserEmitter.cpp create mode 100644 utils/TableGen/tdtags create mode 100644 utils/lit/MANIFEST.in create mode 100644 utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/data.txt delete mode 100644 utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp create mode 100644 utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/pct-S.ll delete mode 100644 utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp delete mode 100644 utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp delete mode 100644 utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp create mode 100644 utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg delete mode 100644 utils/lit/lit/ExampleTests/TclTest/lit.local.cfg delete mode 100644 utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll delete mode 100644 utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll delete mode 100644 utils/lit/lit/LitFormats.py delete mode 100644 utils/lit/lit/TclUtil.py create mode 100644 utils/lit/lit/discovery.py create mode 100644 utils/lit/tests/.coveragerc create mode 100644 utils/lit/tests/Inputs/discovery/lit.cfg create mode 100644 utils/lit/tests/Inputs/discovery/subdir/lit.local.cfg create mode 100644 utils/lit/tests/Inputs/discovery/subdir/test-three.py create mode 100644 utils/lit/tests/Inputs/discovery/subsuite/lit.cfg create mode 100644 utils/lit/tests/Inputs/discovery/subsuite/test-one.txt create mode 100644 utils/lit/tests/Inputs/discovery/subsuite/test-two.txt create mode 100644 utils/lit/tests/Inputs/discovery/test-one.txt create mode 100644 utils/lit/tests/Inputs/discovery/test-two.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/external_shell/fail.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/external_shell/lit.local.cfg create mode 100644 utils/lit/tests/Inputs/shtest-format/external_shell/pass.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/fail.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/lit.cfg create mode 100644 utils/lit/tests/Inputs/shtest-format/no-test-line.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/pass.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/requires-missing.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/requires-present.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/unsupported_dir/lit.local.cfg create mode 100644 utils/lit/tests/Inputs/shtest-format/unsupported_dir/some-test.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/xfail-feature.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/xfail-target.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/xfail.txt create mode 100644 utils/lit/tests/Inputs/shtest-format/xpass.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/error-0.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/error-1.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/error-2.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/lit.cfg create mode 100644 utils/lit/tests/Inputs/shtest-shell/redirects.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/sequencing-0.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/sequencing-1.txt create mode 100755 utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh create mode 100755 utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh create mode 100644 utils/lit/tests/Inputs/unittest-adaptor/lit.cfg create mode 100644 utils/lit/tests/Inputs/unittest-adaptor/test-one.txt create mode 100644 utils/lit/tests/Inputs/unittest-adaptor/test-two.txt create mode 100644 utils/lit/tests/discovery.py create mode 100644 utils/lit/tests/lit.cfg create mode 100644 utils/lit/tests/shell-parsing.py create mode 100644 utils/lit/tests/shtest-format.py create mode 100644 utils/lit/tests/shtest-shell.py create mode 100644 utils/lit/tests/unittest-adaptor.py create mode 100644 utils/lit/tests/usage.py create mode 100644 utils/lit/utils/README.txt create mode 100755 utils/lit/utils/check-coverage create mode 100755 utils/lit/utils/check-sdist create mode 100644 utils/llvm.natvis create mode 100755 utils/sort_includes.py create mode 100644 utils/testgen/mc-bundling-x86-gen.py create mode 100644 utils/textmate/README create mode 100644 utils/textmate/TableGen.tmbundle/Syntaxes/TableGen.tmLanguage create mode 100644 utils/textmate/TableGen.tmbundle/info.plist create mode 100644 utils/unittest/googletest/gtest-all.cc create mode 100755 utils/wciia.py diff --git a/.arcconfig b/.arcconfig new file mode 100644 index 000000000000..4711195a1d01 --- /dev/null +++ b/.arcconfig @@ -0,0 +1,4 @@ +{ + "project_id" : "llvm", + "conduit_uri" : "http://llvm-reviews.chandlerc.com/" +} diff --git a/CMakeLists.txt b/CMakeLists.txt index d3edc0219858..6871e654fb1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ set(CMAKE_MODULE_PATH ) set(LLVM_VERSION_MAJOR 3) -set(LLVM_VERSION_MINOR 2) +set(LLVM_VERSION_MINOR 3) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn") @@ -74,8 +74,8 @@ set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) set(LLVM_ALL_TARGETS + AArch64 ARM - CellSPU CppBackend Hexagon Mips @@ -186,13 +186,16 @@ endif( LLVM_USE_INTEL_JITEVENTS ) option(LLVM_USE_OPROFILE "Use opagent JIT interface to inform OProfile about JIT code" OFF) -# If enabled, ierify we are on a platform that supports oprofile. +# If enabled, verify we are on a platform that supports oprofile. if( LLVM_USE_OPROFILE ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) message(FATAL_ERROR "OProfile support is available on Linux only.") endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) endif( LLVM_USE_OPROFILE ) +set(LLVM_USE_SANITIZER "" CACHE STRING + "Define the sanitizer used to build binaries and tests.") + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) @@ -243,8 +246,7 @@ include(config-ix) # invocation time. set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING "Default target for which LLVM will generate code." ) -set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" CACHE STRING - "Default target for which LLVM will generate code." ) +set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") include(HandleLLVMOptions) @@ -377,10 +379,21 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR}) +if( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD ) + # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM + # with libxml2, iconv.h, etc., we must add /usr/local paths. + include_directories("/usr/local/include") + link_directories("/usr/local/lib") +endif( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD ) + if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h") endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) +# Make sure we don't get -rdynamic in every binary. For those that need it, +# use set_target_properties(target PROPERTIES ENABLE_EXPORTS 1) +set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") + include(AddLLVM) include(TableGen) @@ -406,7 +419,6 @@ add_subdirectory(utils/count) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) add_subdirectory(utils/yaml-bench) -add_subdirectory(utils/obj2yaml) add_subdirectory(utils/yaml2obj) add_subdirectory(projects) @@ -428,7 +440,7 @@ if( LLVM_INCLUDE_TESTS ) add_subdirectory(utils/unittest) add_subdirectory(unittests) if (MSVC) - # This utility is used to prevent chrashing tests from calling Dr. Watson on + # This utility is used to prevent crashing tests from calling Dr. Watson on # Windows. add_subdirectory(utils/KillTheDoctor) endif() diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index fd7bcda3b768..10bf071801fd 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -8,9 +8,30 @@ beautification by scripts. The fields are: name (N), email (E), web-address (W), PGP key ID and fingerprint (P), description (D), and snail-mail address (S). +N: Joe Abbey +E: jabbey@arxan.com +D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*) + +N: Owen Anderson +E: resistor@mac.com +D: SelectionDAG (lib/CodeGen/SelectionDAG/*) + +N: Rafael Avila de Espindola +E: rafael.espindola@gmail.com +D: Gold plugin (tools/gold/*) + +N: Chandler Carruth +E: chandlerc@gmail.com +E: chandlerc@google.com +D: Config, ADT, Support, inlining & related passse, SROA/mem2reg & related passes, CMake, library layering + N: Evan Cheng E: evan.cheng@apple.com -D: Code generator and all targets +D: ARM target, parts of code generator not covered by someone else + +N: Eric Christopher +E: echristo@gmail.com +D: Debug Information, autotools/configure/make build, inline assembly N: Greg Clayton D: LLDB @@ -18,34 +39,93 @@ D: LLDB N: Peter Collingbourne D: libclc -N: Doug Gregor -D: Clang Frontend Libraries +N: Anshuman Dasgupta +E: adasgupt@codeaurora.org +D: Hexagon Backend + +N: Hal Finkel +E: hfinkel@anl.gov +D: BBVectorize and the PowerPC target + +N: Venkatraman Govindaraju +E: venkatra@cs.wisc.edu +D: Sparc Backend (lib/Target/Sparc/*) N: Tobias Grosser D: Polly +N: James Grosbach +E: grosbach@apple.com +D: MC layer + N: Howard Hinnant D: libc++ -N: Anton Korobeynikov -E: asl@math.spbu.ru -D: Exception handling, debug information, and Windows codegen +N: Justin Holewinski +E: jholewinski@nvidia.com +D: NVPTX Target (lib/Target/NVPTX/*) -N: Ted Kremenek -D: Clang Static Analyzer +N: Andy Kaylor +E: andrew.kaylor@intel.com +D: MCJIT, RuntimeDyld and JIT event listeners + +N: Galina Kistanova +E: gkistanova@gmail.com +D: LLVM Buildbot + +N: Anton Korobeynikov +E: anton@korobeynikov.info +D: Exception handling, Windows codegen, ARM EABI + +N: Benjamin Kramer +E: benny.kra@gmail.com +D: DWARF Parser + +N: Sergei Larin +E: slarin@codeaurora.org +D: VLIW Instruction Scheduling, Packetization N: Chris Lattner E: sabre@nondot.org W: http://nondot.org/~sabre/ D: Everything not covered by someone else -N: John McCall -E: rjmccall@apple.com -D: Clang LLVM IR generation +N: Tim Northover +E: Tim.Northover@arm.com +D: AArch64 backend N: Jakob Olesen D: Register allocators and TableGen +N: Richard Osborne +E: richard@xmos.com +D: XCore Backend + +N: Chad Rosier +E: mcrosier@apple.com +D: Fast-Isel + +N: Nadav Rotem +E: nrotem@apple.com +D: X86 Backend, Loop Vectorizer + N: Duncan Sands E: baldrick@free.fr D: DragonEgg + +N: Michael Spencer +E: bigcheesegs@gmail.com +D: Windows parts of Support, Object, ar, nm, objdump, ranlib, size + +N: Tom Stellard +E: thomas.stellard@amd.com +E: mesa-dev@lists.freedesktop.org +D: R600 Backend + +N: Andrew Trick +E: atrick@apple.com +D: IndVar Simplify, Loop Strength Reduction, Instruction Scheduling + +N: Bill Wendling +E: wendling@apple.com +D: libLTO & IR Linker diff --git a/CREDITS.TXT b/CREDITS.TXT index 02579182589a..e89f19e79446 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -60,9 +60,11 @@ D: Loop unrolling with run-time trip counts. N: Chandler Carruth E: chandlerc@gmail.com +E: chandlerc@google.com D: Hashing algorithms and interfaces D: Inline cost analysis D: Machine block placement pass +D: SROA N: Casey Carter E: ccarter@uiuc.edu @@ -98,7 +100,7 @@ E: adasgupt@codeaurora.org D: Deterministic finite automaton based infrastructure for VLIW packetization N: Stefanus Du Toit -E: stefanus.dutoit@rapidmind.com +E: stefanus.du.toit@intel.com D: Bug fixes and minor improvements N: Rafael Avila de Espindola @@ -141,7 +143,7 @@ E: foldr@codedgers.com D: Author of llvmc2 N: Dan Gohman -E: gohman@apple.com +E: dan433584@gmail.com D: Miscellaneous bug fixes N: David Goodwin @@ -361,8 +363,8 @@ D: ARM fast-isel improvements D: Performance monitoring N: Nadav Rotem -E: nadav.rotem@intel.com -D: Vector code generation improvements. +E: nrotem@apple.com +D: X86 code generation improvements, Loop Vectorizer. N: Roman Samoilov E: roman@codedgers.com @@ -402,6 +404,10 @@ E: rspencer@reidspencer.com W: http://reidspencer.com/ D: Lots of stuff, see: http://wiki.llvm.org/index.php/User:Reid +N: Craig Topper +E: craig.topper@gmail.com +D: X86 codegen and disassembler improvements. AVX2 support. + N: Edwin Torok E: edwintorok@gmail.com D: Miscellaneous bug fixes @@ -417,7 +423,6 @@ D: Thread Local Storage implementation N: Bill Wendling E: wendling@apple.com -D: Exception handling D: Bunches of stuff N: Bob Wilson diff --git a/LICENSE.TXT b/LICENSE.TXT index 00cf60116941..aa7b11922ec0 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: @@ -64,7 +64,7 @@ Program Directory Autoconf llvm/autoconf llvm/projects/ModuleMaker/autoconf llvm/projects/sample/autoconf -CellSPU backend llvm/lib/Target/CellSPU/README.txt Google Test llvm/utils/unittest/googletest OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} +ARM contributions llvm/lib/Target/ARM/LICENSE.TXT diff --git a/Makefile b/Makefile index 1e5dae470d26..7a1b19045945 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,8 @@ LEVEL := . # Top-Level LLVM Build Stages: # 1. Build lib/Support and lib/TableGen, which are used by utils (tblgen). -# 2. Build utils, which is used by VMCore. -# 3. Build VMCore, which builds the Intrinsics.inc file used by libs. +# 2. Build utils, which is used by IR. +# 3. Build IR, which builds the Intrinsics.inc file used by libs. # 4. Build libs, which are needed by llvm-config. # 5. Build llvm-config, which determines inter-lib dependencies for tools. # 6. Build tools, runtime, docs. @@ -30,7 +30,7 @@ ifeq ($(BUILD_DIRS_ONLY),1) DIRS := lib/Support lib/TableGen utils tools/llvm-config OPTIONAL_DIRS := tools/clang/utils/TableGen else - DIRS := lib/Support lib/TableGen utils lib/VMCore lib tools/llvm-shlib \ + DIRS := lib/Support lib/TableGen utils lib/IR lib tools/llvm-shlib \ tools/llvm-config tools runtime docs unittests OPTIONAL_DIRS := projects bindings endif @@ -248,13 +248,26 @@ build-for-llvm-top: SVN = svn SVN-UPDATE-OPTIONS = AWK = awk -SUB-SVN-DIRS = $(AWK) '/I|\? / {print $$2}' \ - | LC_ALL=C xargs $(SVN) info 2>/dev/null \ - | $(AWK) '/^Path:\ / {print $$2}' + +# Multiline variable defining a recursive function for finding svn repos rooted at +# a given path. svnup() requires one argument: the root to search from. +define SUB_SVN_DIRS +svnup() { + dirs=`svn status --no-ignore $$1 | awk '/I|\? / {print $$2}' | LC_ALL=C xargs svn info 2>/dev/null | awk '/^Path:\ / {print $$2}'`; + if [ "$$dirs" = "" ]; then + return; + fi; + for f in $$dirs; do + echo $$f; + svnup $$f; + done +} +endef +export SUB_SVN_DIRS update: $(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT) - @ $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update + @eval $$SUB_SVN_DIRS; $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | svnup $(LLVM_SRC_ROOT) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update happiness: update all check-all diff --git a/Makefile.common b/Makefile.common index 55e2b63434c8..a157abaef274 100644 --- a/Makefile.common +++ b/Makefile.common @@ -28,7 +28,7 @@ # built in any order. All DIRS are built in order before PARALLEL_DIRS are # built, which are then built in any order. # -# 4. Source - If specified, this sets the source code filenames. If this +# 4. SOURCES - If specified, this sets the source code filenames. If this # is not set, it defaults to be all of the .cpp, .c, .y, and .l files # in the current directory. # diff --git a/Makefile.config.in b/Makefile.config.in index b4ecea631e3c..26e3709fee00 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -134,6 +134,9 @@ BUILD_CXX=@BUILD_CXX@ # Triple for configuring build tools when cross-compiling BUILD_TRIPLE=@build@ +# Target triple (cpu-vendor-os) which LLVM is compiled for +HOST_TRIPLE=@host@ + # Target triple (cpu-vendor-os) for which we should generate code TARGET_TRIPLE=@target@ @@ -153,8 +156,17 @@ CXX = @CXX@ # Path to the CC binary, which use used by testcases for native builds. CC := @CC@ +# C/C++ preprocessor flags. +CPPFLAGS += @CPPFLAGS@ + +# C compiler flags. +CFLAGS += @CFLAGS@ + +# C++ compiler flags. +CXXFLAGS += @CXXFLAGS@ + # Linker flags. -LDFLAGS+=@LDFLAGS@ +LDFLAGS += @LDFLAGS@ # Path to the library archiver program. AR_PATH = @AR@ @@ -176,6 +188,7 @@ RANLIB := @RANLIB@ RM := @RM@ SED := @SED@ TAR := @TAR@ +PYTHON := @PYTHON@ # Paths to miscellaneous programs we hope are present but might not be BZIP2 := @BZIP2@ @@ -222,6 +235,15 @@ ENABLE_LIBCPP = @ENABLE_LIBCPP@ # When ENABLE_CXX11 is enabled, LLVM uses c++11 mode by default to build. ENABLE_CXX11 = @ENABLE_CXX11@ +# When ENABLE_CLANG_ARCMT is enabled, clang will have ARCMigrationTool. +ENABLE_CLANG_ARCMT = @ENABLE_CLANG_ARCMT@ + +# When ENABLE_CLANG_REWRITER is enabled, clang will have Rewriter. +ENABLE_CLANG_REWRITER = @ENABLE_CLANG_REWRITER@ + +# When ENABLE_CLANG_STATIC_ANALYZER is enabled, clang will have StaticAnalyzer. +ENABLE_CLANG_STATIC_ANALYZER = @ENABLE_CLANG_STATIC_ANALYZER@ + # When ENABLE_WERROR is enabled, we'll pass -Werror on the command line ENABLE_WERROR = @ENABLE_WERROR@ @@ -278,7 +300,7 @@ ENABLE_DOCS = @ENABLE_DOCS@ ENABLE_DOXYGEN = @ENABLE_DOXYGEN@ # Do we want to enable threads? -ENABLE_THREADS := @ENABLE_THREADS@ +ENABLE_THREADS := @LLVM_ENABLE_THREADS@ # Do we want to build with position independent code? ENABLE_PIC := @ENABLE_PIC@ @@ -349,6 +371,10 @@ NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@ NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@ # -Wcovered-switch-default COVERED_SWITCH_DEFAULT = @COVERED_SWITCH_DEFAULT@ +# -Wno-uninitialized +NO_UNINITIALIZED = @NO_UNINITIALIZED@ +# -Wno-maybe-uninitialized +NO_MAYBE_UNINITIALIZED = @NO_MAYBE_UNINITIALIZED@ # Was polly found in tools/polly? LLVM_HAS_POLLY = @LLVM_HAS_POLLY@ diff --git a/Makefile.rules b/Makefile.rules index b2b02c25d44b..2c834aac63fc 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -97,7 +97,7 @@ endif $(LLVMBuildMakeFrag): $(PROJ_SRC_ROOT)/Makefile.rules \ $(PROJ_OBJ_ROOT)/Makefile.config $(Echo) Constructing LLVMBuild project information. - $(Verb) $(LLVMBuildTool) \ + $(Verb)$(PYTHON) $(LLVMBuildTool) \ --native-target "$(TARGET_NATIVE_ARCH)" \ --enable-targets "$(TARGETS_TO_BUILD)" \ --enable-optional-components "$(OPTIONAL_COMPONENTS)" \ @@ -280,12 +280,6 @@ ifeq ($(ENABLE_OPTIMIZED),1) endif endif - # Darwin requires -fstrict-aliasing to be explicitly enabled. - # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues - # with -fstrict-aliasing and ipa-type-escape radr://6756684 - #ifeq ($(HOST_OS),Darwin) - # EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing - #endif CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer) C.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer) LD.Flags += $(OPTIMIZE_OPTION) @@ -583,16 +577,24 @@ ifeq ($(HOST_OS),Darwin) LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress SharedLinkOptions := -dynamiclib - ifneq ($(ARCH),ARM) - SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION) + ifdef DEPLOYMENT_TARGET + SharedLinkOptions += $(DEPLOYMENT_TARGET) + else + ifneq ($(ARCH),ARM) + SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION) + endif endif else SharedLinkOptions=-shared endif ifeq ($(TARGET_OS),Darwin) - ifneq ($(ARCH),ARM) - TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION) + ifdef DEPLOYMENT_TARGET + TargetCommonOpts += $(DEPLOYMENT_TARGET) + else + ifneq ($(ARCH),ARM) + TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION) + endif endif endif @@ -648,7 +650,7 @@ else ifneq ($(DARWIN_MAJVERS),4) LD.Flags += $(RPATH) -Wl,@executable_path/../lib endif - ifeq ($(RC_BUILDIT),YES) + ifeq ($(RC_XBS),YES) TempFile := $(shell mkdir -p ${OBJROOT}/dSYMs ; mktemp ${OBJROOT}/dSYMs/llvm-lto.XXXXXX) LD.Flags += -Wl,-object_path_lto -Wl,$(TempFile) endif @@ -668,7 +670,9 @@ ifndef NO_PEDANTIC CompileCommonOpts += -pedantic -Wno-long-long endif CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \ - $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) + $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) \ + $(NO_UNINITIALIZED) $(NO_MAYBE_UNINITIALIZED) \ + $(NO_MISSING_FIELD_INITIALIZERS) # Enable cast-qual for C++; the workaround is to use const_cast. CXX.Flags += -Wcast-qual @@ -824,7 +828,7 @@ ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc) #---------------------------------------------------------- ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE))) - ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])" + ECHOPATH := $(Verb)$(PYTHON) -u -c "import sys;print ' '.join(sys.argv[1:])" else ECHOPATH := $(Verb)$(ECHO) endif @@ -1814,7 +1818,7 @@ TDFiles := $(strip $(wildcard $(PROJ_SRC_DIR)/*.td) \ $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSchedule.td \ $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSelectionDAG.td \ $(LLVM_SRC_ROOT)/include/llvm/CodeGen/ValueTypes.td) \ - $(wildcard $(LLVM_SRC_ROOT)/include/llvm/Intrinsics*.td) + $(wildcard $(LLVM_SRC_ROOT)/include/llvm/IR/Intrinsics*.td) # All .inc.tmp files depend on the .td files. $(INCTMPFiles) : $(TDFiles) @@ -1869,11 +1873,6 @@ $(ObjDir)/%GenDisassemblerTables.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN) $(Echo) "Building $( +#if __has_include () +#include +#endif +#if __has_include () +#include +#endif +]])], +[ + AC_MSG_RESULT([yes]) +], +[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Selected compiler could not find or parse C++ standard library headers. Rerun with CC=c-compiler CXX=c++-compiler ./configure ...]) +]) + AC_LANG_POP([C++]) +fi + dnl Configure all of the projects present in our source tree. While we could dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated. @@ -363,6 +394,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; + aarch64*-*) llvm_cv_target_arch="AArch64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -396,6 +428,7 @@ case $host in sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; arm*-*) host_arch="ARM" ;; + aarch64*-*) host_arch="AArch64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -475,6 +508,54 @@ case "$enableval" in *) AC_MSG_ERROR([Invalid setting for --enable-cxx11. Use "yes" or "no"]) ;; esac +dnl --enable-clang-arcmt: check whether to enable clang arcmt +clang_arcmt="yes" +AC_ARG_ENABLE(clang-arcmt, + AS_HELP_STRING([--enable-clang-arcmt], + [Enable building of clang ARCMT (default is YES)]), + clang_arcmt="$enableval", + enableval="yes") +case "$enableval" in + yes) AC_SUBST(ENABLE_CLANG_ARCMT,[1]) ;; + no) AC_SUBST(ENABLE_CLANG_ARCMT,[0]) ;; + default) AC_SUBST(ENABLE_CLANG_ARCMT,[1]);; + *) AC_MSG_ERROR([Invalid setting for --enable-clang-arcmt. Use "yes" or "no"]) ;; +esac + +dnl --enable-clang-static-analyzer: check whether to enable static-analyzer +clang_static_analyzer="yes" +AC_ARG_ENABLE(clang-static-analyzer, + AS_HELP_STRING([--enable-clang-static-analyzer], + [Enable building of clang Static Analyzer (default is YES)]), + clang_static_analyzer="$enableval", + enableval="yes") +case "$enableval" in + yes) AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[1]) ;; + no) AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[0]) ;; + default) AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[1]);; + *) AC_MSG_ERROR([Invalid setting for --enable-clang-static-analyzer. Use "yes" or "no"]) ;; +esac + +dnl --enable-clang-rewriter: check whether to enable clang rewriter +AC_ARG_ENABLE(clang-rewriter, + AS_HELP_STRING([--enable-clang-rewriter], + [Enable building of clang rewriter (default is YES)]),, + enableval="yes") +case "$enableval" in + yes) AC_SUBST(ENABLE_CLANG_REWRITER,[1]) ;; + no) + if test ${clang_arcmt} != "no" ; then + AC_MSG_ERROR([Cannot enable clang ARC Migration Tool while disabling rewriter.]) + fi + if test ${clang_static_analyzer} != "no" ; then + AC_MSG_ERROR([Cannot enable clang static analyzer while disabling rewriter.]) + fi + AC_SUBST(ENABLE_CLANG_REWRITER,[0]) + ;; + default) AC_SUBST(ENABLE_CLANG_REWRITER,[1]);; + *) AC_MSG_ERROR([Invalid setting for --enable-clang-rewriter. Use "yes" or "no"]) ;; +esac + dnl --enable-optimized : check whether they want to do an optimized build: AC_ARG_ENABLE(optimized, AS_HELP_STRING( --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize) @@ -566,6 +647,7 @@ else PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;; x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;; ARM) AC_SUBST(TARGET_HAS_JIT,1) ;; + AArch64) AC_SUBST(TARGET_HAS_JIT,0) ;; Mips) AC_SUBST(TARGET_HAS_JIT,1) ;; XCore) AC_SUBST(TARGET_HAS_JIT,0) ;; MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; @@ -697,26 +779,26 @@ dnl Allow specific targets to be specified for building (or not) TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], [Build specific host targets: all or target1,target2,... Valid targets are: - host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, + host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon, xcore, msp430, nvptx, and cpp (default=all)]),, enableval=all) if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; @@ -731,7 +813,6 @@ case "$enableval" in ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; @@ -1165,10 +1246,15 @@ fi dnl Verify that GCC is version 3.0 or higher if test "$GCC" = "yes" then - AC_COMPILE_IFELSE([[#if !defined(__GNUC__) || __GNUC__ < 3 -#error Unsupported GCC version -#endif -]], [], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])]) + AC_COMPILE_IFELSE( +[ + AC_LANG_SOURCE([[ + #if !defined(__GNUC__) || __GNUC__ < 3 + #error Unsupported GCC version + #endif + ]]) +], +[], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])]) fi dnl Check for GNU Make. We use its extensions, so don't build without it @@ -1185,7 +1271,53 @@ AC_MSG_CHECKING([optional compiler flags]) CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros]) CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers]) CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default]) -AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT]) + +dnl GCC's potential uninitialized use analysis is weak and presents lots of +dnl false positives, so disable it. +NO_UNINITIALIZED= +NO_MAYBE_UNINITIALIZED= +if test "$GXX" = "yes" +then + CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized]) + dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are + dnl known to be uninitialized from cases which might be uninitialized. We + dnl still want to catch the first kind of errors. + if test -z "$NO_MAYBE_UNINITIALIZED" + then + CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized]) + fi +fi +AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED]) + +AC_ARG_WITH([python], + [AS_HELP_STRING([--with-python], [path to python])], + [PYTHON="$withval"]) + +if test -n "$PYTHON" && test -x "$PYTHON" ; then + AC_MSG_CHECKING([for python]) + AC_MSG_RESULT([user defined: $with_python]) +else + if test -n "$PYTHON" ; then + AC_MSG_WARN([specified python ($PYTHON) is not usable, searching path]) + fi + + AC_PATH_PROG([PYTHON], [python python2 python26], + [AC_MSG_RESULT([not found]) + AC_MSG_ERROR([could not find python 2.5 or higher])]) +fi + +AC_MSG_CHECKING([for python >= 2.5]) +ac_python_version=`$PYTHON -c 'import sys; print sys.version.split()[[0]]'` +ac_python_version_major=`echo $ac_python_version | cut -d'.' -f1` +ac_python_version_minor=`echo $ac_python_version | cut -d'.' -f2` +ac_python_version_patch=`echo $ac_python_version | cut -d'.' -f3` +if test "$ac_python_version_major" -eq "2" \ + && test "$ac_python_version_minor" -ge "5" ; then + AC_MSG_RESULT([$PYTHON ($ac_python_version)]) +else + AC_MSG_RESULT([not found]) + AC_MSG_FAILURE([found python $ac_python_version ($PYTHON); required >= 2.5]) +fi dnl===-----------------------------------------------------------------------=== dnl=== @@ -1204,6 +1336,11 @@ AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1], [Define if dlopen() is available on this platform.]), AC_MSG_WARN([dlopen() not found - disabling plugin support])) +dnl Search for the clock_gettime() function. Note that we rely on the POSIX +dnl macros to detect whether clock_gettime is available, this just finds the +dnl right libraries to link with. +AC_SEARCH_LIBS(clock_gettime,rt) + dnl libffi is optional; used to call external functions from the interpreter if test "$llvm_cv_enable_libffi" = "yes" ; then AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1], @@ -1356,6 +1493,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h]) AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h]) AC_CHECK_HEADERS([valgrind/valgrind.h]) AC_CHECK_HEADERS([fenv.h]) +AC_CHECK_DECLS([FE_ALL_EXCEPT, FE_INEXACT], [], [], [[#include ]]) if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then AC_CHECK_HEADERS(pthread.h, AC_SUBST(HAVE_PTHREAD, 1), @@ -1375,18 +1513,23 @@ AC_CHECK_HEADERS([CrashReporterClient.h]) dnl Try to find Darwin specific crash reporting global. AC_MSG_CHECKING([__crashreporter_info__]) AC_LINK_IFELSE( - AC_LANG_SOURCE( - [[extern const char *__crashreporter_info__; - int main() { - __crashreporter_info__ = "test"; - return 0; - } - ]]), - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CRASHREPORTER_INFO, 1, Can use __crashreporter_info__), - AC_MSG_RESULT(no) - AC_DEFINE(HAVE_CRASHREPORTER_INFO, 0, - Define if __crashreporter_info__ exists.)) +[ + AC_LANG_SOURCE([[ + extern const char *__crashreporter_info__; + int main() { + __crashreporter_info__ = "test"; + return 0; + } + ]]) +], +[ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_CRASHREPORTER_INFO], [1], [can use __crashreporter_info__]) +], +[ + AC_MSG_RESULT([no]) + AC_DEFINE([HAVE_CRASHREPORTER_INFO], [0], [can use __crashreporter_info__]) +]) dnl===-----------------------------------------------------------------------=== dnl=== @@ -1412,6 +1555,7 @@ dnl===-----------------------------------------------------------------------=== AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ]) AC_CHECK_FUNCS([powf fmodf strtof round ]) +AC_CHECK_FUNCS([log log2 log10 exp exp2]) AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ]) AC_CHECK_FUNCS([isatty mkdtemp mkstemp ]) AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit strdup ]) @@ -1449,10 +1593,15 @@ fi dnl Check Win32 API EnumerateLoadedModules. if test "$llvm_cv_os_type" = "MingW" ; then AC_MSG_CHECKING([whether EnumerateLoadedModules() accepts new decl]) - AC_COMPILE_IFELSE([[#include -#include -extern void foo(PENUMLOADED_MODULES_CALLBACK); -extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));]], + AC_COMPILE_IFELSE( +[ + AC_LANG_SOURCE([[ + #include + #include + extern void foo(PENUMLOADED_MODULES_CALLBACK); + extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); + ]]) +], [ AC_MSG_RESULT([yes]) llvm_cv_win32_elmcb_pcstr="PCSTR" @@ -1493,22 +1642,28 @@ dnl Since we'll be using these atomic builtins in C++ files we should test dnl the C++ compiler. AC_LANG_PUSH([C++]) AC_LINK_IFELSE( - AC_LANG_SOURCE( - [[int main() { - volatile unsigned long val = 1; - __sync_synchronize(); - __sync_val_compare_and_swap(&val, 1, 0); - __sync_add_and_fetch(&val, 1); - __sync_sub_and_fetch(&val, 1); - return 0; - } - ]]), - AC_LANG_POP([C++]) - AC_MSG_RESULT(yes) - AC_DEFINE(LLVM_HAS_ATOMICS, 1, Has gcc/MSVC atomic intrinsics), - AC_MSG_RESULT(no) - AC_DEFINE(LLVM_HAS_ATOMICS, 0, Has gcc/MSVC atomic intrinsics) - AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing])) +[ + AC_LANG_SOURCE([[ + int main() { + volatile unsigned long val = 1; + __sync_synchronize(); + __sync_val_compare_and_swap(&val, 1, 0); + __sync_add_and_fetch(&val, 1); + __sync_sub_and_fetch(&val, 1); + return 0; + } + ]]) +], +[ + AC_MSG_RESULT([yes]) + AC_DEFINE([LLVM_HAS_ATOMICS], [1], [Has gcc/MSVC atomic intrinsics]) +], +[ + AC_MSG_RESULT([no]) + AC_DEFINE([LLVM_HAS_ATOMICS], [0], [Has gcc/MSVC atomic intrinsics]) + AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing]) +]) +AC_LANG_POP([C++]) dnl===-----------------------------------------------------------------------=== dnl=== diff --git a/autoconf/m4/cxx_flag_check.m4 b/autoconf/m4/cxx_flag_check.m4 index 62454b7147f9..4b0974455015 100644 --- a/autoconf/m4/cxx_flag_check.m4 +++ b/autoconf/m4/cxx_flag_check.m4 @@ -1,2 +1,2 @@ AC_DEFUN([CXX_FLAG_CHECK], - [AC_SUBST($1, `$CXX -Werror $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)]) + [AC_SUBST($1, `$CXX -Werror patsubst($2, [^-Wno-], [-W]) -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)]) diff --git a/autoconf/m4/func_isinf.m4 b/autoconf/m4/func_isinf.m4 index 22ba81d54d59..40dc48b2b8a8 100644 --- a/autoconf/m4/func_isinf.m4 +++ b/autoconf/m4/func_isinf.m4 @@ -1,34 +1,40 @@ -# -# This function determins if the isinf function isavailable on this -# platform. -# +dnl +dnl This function determins if the isinf function isavailable on this +dnl platform. +dnl + AC_DEFUN([AC_FUNC_ISINF],[ + AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_math_h], [isinf], [], [float f; isinf(f);]) if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then - AC_DEFINE([HAVE_ISINF_IN_MATH_H],1,[Set to 1 if the isinf function is found in ]) + AC_DEFINE([HAVE_ISINF_IN_MATH_H], [1], + [Set to 1 if the isinf function is found in ]) fi AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_cmath], [isinf], [], [float f; isinf(f);]) if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then - AC_DEFINE([HAVE_ISINF_IN_CMATH],1,[Set to 1 if the isinf function is found in ]) + AC_DEFINE([HAVE_ISINF_IN_CMATH], [1], + [Set to 1 if the isinf function is found in ]) fi AC_SINGLE_CXX_CHECK([ac_cv_func_std_isinf_in_cmath], [std::isinf], [], [float f; std::isinf(f);]) if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then - AC_DEFINE([HAVE_STD_ISINF_IN_CMATH],1,[Set to 1 if the std::isinf function is found in ]) + AC_DEFINE([HAVE_STD_ISINF_IN_CMATH], [1], + [Set to 1 if the std::isinf function is found in ]) fi AC_SINGLE_CXX_CHECK([ac_cv_func_finite_in_ieeefp_h], [finite], [], [float f; finite(f);]) if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then - AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H],1,[Set to 1 if the finite function is found in ]) + AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H], [1], + [Set to 1 if the finite function is found in ]) fi ]) diff --git a/autoconf/m4/huge_val.m4 b/autoconf/m4/huge_val.m4 index 6c9a22eab002..d224d7cb64eb 100644 --- a/autoconf/m4/huge_val.m4 +++ b/autoconf/m4/huge_val.m4 @@ -7,12 +7,10 @@ AC_DEFUN([AC_HUGE_VAL_CHECK],[ AC_LANG_PUSH([C++]) ac_save_CXXFLAGS=$CXXFLAGS CXXFLAGS="$CXXFLAGS -pedantic" - AC_RUN_IFELSE( - AC_LANG_PROGRAM( - [#include ], - [double x = HUGE_VAL; return x != x; ]), - [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no], - [ac_cv_huge_val_sanity=yes]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([[#include ]], + [[double x = HUGE_VAL; return x != x;]])], + [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no], + [ac_cv_huge_val_sanity=yes]) CXXFLAGS=$ac_save_CXXFLAGS AC_LANG_POP([C++]) ]) diff --git a/autoconf/m4/single_cxx_check.m4 b/autoconf/m4/single_cxx_check.m4 index 21efa4bed353..cb4732641825 100644 --- a/autoconf/m4/single_cxx_check.m4 +++ b/autoconf/m4/single_cxx_check.m4 @@ -1,10 +1,16 @@ +dnl dnl AC_SINGLE_CXX_CHECK(CACHEVAR, FUNCTION, HEADER, PROGRAM) -dnl $1, $2, $3, $4, -dnl -AC_DEFUN([AC_SINGLE_CXX_CHECK], - [AC_CACHE_CHECK([for $2 in $3], [$1], - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([#include $3],[$4]),[$1=yes],[$1=no]) - AC_LANG_POP([C++])]) - ]) +dnl $1, $2, $3, $4, + +AC_DEFUN([AC_SINGLE_CXX_CHECK], +[ + AC_CACHE_CHECK([for $2 in $3], [$1], + [ + AC_LANG_PUSH([C++]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]][$3], [$4])], + [$1][[=yes]], + [$1][[=no]]) + AC_LANG_POP([C++]) + ]) +]) diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py index 0c5fcd03d844..17c22b8ef448 100644 --- a/bindings/python/llvm/common.py +++ b/bindings/python/llvm/common.py @@ -12,10 +12,14 @@ from ctypes import c_void_p from ctypes import cdll import ctypes.util +import platform + +# LLVM_VERSION: sync with PACKAGE_VERSION in autoconf/configure.ac and CMakeLists.txt +# but leave out the 'svn' suffix. +LLVM_VERSION = '3.3' __all__ = [ 'c_object_p', - 'find_library', 'get_library', ] @@ -87,20 +91,36 @@ class CachedProperty(object): return value -def find_library(): - # FIXME should probably have build system define absolute path of shared - # library at install time. - for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']: - result = ctypes.util.find_library(lib) - if result: - return result - - return None - def get_library(): """Obtain a reference to the llvm library.""" - lib = find_library() - if not lib: - raise Exception('LLVM shared library not found!') - return cdll.LoadLibrary(lib) + # On Linux, ctypes.cdll.LoadLibrary() respects LD_LIBRARY_PATH + # while ctypes.util.find_library() doesn't. + # See http://docs.python.org/2/library/ctypes.html#finding-shared-libraries + # + # To make it possible to run the unit tests without installing the LLVM shared + # library into a default linker search path. Always Try ctypes.cdll.LoadLibrary() + # with all possible library names first, then try ctypes.util.find_library(). + + names = ['LLVM-' + LLVM_VERSION, 'LLVM-' + LLVM_VERSION + 'svn'] + t = platform.system() + if t == 'Darwin': + pfx, ext = 'lib', '.dylib' + elif t == 'Windows': + pfx, ext = '', '.dll' + else: + pfx, ext = 'lib', '.so' + + for i in names: + try: + lib = cdll.LoadLibrary(pfx + i + ext) + except OSError: + pass + else: + return lib + + for i in names: + t = ctypes.util.find_library(i) + if t: + return cdll.LoadLibrary(t) + raise Exception('LLVM shared library not found!') diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py index 5030b989a944..dcef9ac26905 100644 --- a/bindings/python/llvm/disassembler.py +++ b/bindings/python/llvm/disassembler.py @@ -31,6 +31,9 @@ __all__ = [ lib = get_library() callbacks = {} +# Constants for set_options +Option_UseMarkup = 1 + class Disassembler(LLVMObject): """Represents a disassembler instance. @@ -113,6 +116,10 @@ class Disassembler(LLVMObject): address += result offset += result + def set_options(self, options): + if not lib.LLVMSetDisasmOptions(self, options): + raise Exception('Unable to set all disassembler options in %i' % options) + def register_library(library): library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, @@ -125,6 +132,10 @@ def register_library(library): c_uint64, c_uint64, c_char_p, c_size_t] library.LLVMDisasmInstruction.restype = c_size_t + library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64] + library.LLVMSetDisasmOptions.restype = c_int + + callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, c_int, c_void_p) callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py index 545e8668b6c9..46d12f705626 100644 --- a/bindings/python/llvm/tests/test_disassembler.py +++ b/bindings/python/llvm/tests/test_disassembler.py @@ -1,6 +1,6 @@ from .base import TestBase -from ..disassembler import Disassembler +from ..disassembler import Disassembler, Option_UseMarkup class TestDisassembler(TestBase): def test_instantiate(self): @@ -26,3 +26,14 @@ class TestDisassembler(TestBase): self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127')) self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi')) + + def test_set_options(self): + sequence = '\x10\x40\x2d\xe9' + triple = 'arm-linux-android' + + disassembler = Disassembler(triple) + disassembler.set_options(Option_UseMarkup) + count, s = disassembler.get_instruction(sequence) + print s + self.assertEqual(count, 4) + self.assertEqual(s, '\tpush\t{, }') diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index fcd5dd556676..7cad190c11a0 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -54,6 +54,7 @@ check_include_file(ndir.h HAVE_NDIR_H) if( NOT PURE_WINDOWS ) check_include_file(pthread.h HAVE_PTHREAD_H) endif() +check_include_file(sanitizer/msan_interface.h HAVE_SANITIZER_MSAN_INTERFACE_H) check_include_file(setjmp.h HAVE_SETJMP_H) check_include_file(signal.h HAVE_SIGNAL_H) check_include_file(stdint.h HAVE_STDINT_H) @@ -79,6 +80,9 @@ check_include_file(utime.h HAVE_UTIME_H) check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H) check_include_file(windows.h HAVE_WINDOWS_H) check_include_file(fenv.h HAVE_FENV_H) +check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT) +check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT) + check_include_file(mach/mach.h HAVE_MACH_MACH_H) check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H) @@ -99,6 +103,7 @@ if( NOT PURE_WINDOWS ) endif() endif() check_library_exists(dl dlopen "" HAVE_LIBDL) + check_library_exists(rt clock_gettime "" HAVE_LIBRT) endif() # function checks @@ -117,6 +122,12 @@ check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H) check_symbol_exists(ceilf math.h HAVE_CEILF) check_symbol_exists(floorf math.h HAVE_FLOORF) check_symbol_exists(fmodf math.h HAVE_FMODF) +check_symbol_exists(log math.h HAVE_LOG) +check_symbol_exists(log2 math.h HAVE_LOG2) +check_symbol_exists(log10 math.h HAVE_LOG10) +check_symbol_exists(exp math.h HAVE_EXP) +check_symbol_exists(exp2 math.h HAVE_EXP2) +check_symbol_exists(exp10 math.h HAVE_EXP10) if( HAVE_SETJMP_H ) check_symbol_exists(longjmp setjmp.h HAVE_LONGJMP) check_symbol_exists(setjmp setjmp.h HAVE_SETJMP) @@ -294,10 +305,33 @@ else() set(ENABLE_PIC 0) endif() +find_package(LibXml2) +if (LIBXML2_FOUND) + set(CLANG_HAVE_LIBXML 1) +endif () + include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG) +set(USE_NO_MAYBE_UNINITIALIZED 0) +set(USE_NO_UNINITIALIZED 0) + +# Disable gcc's potentially uninitialized use analysis as it presents lots of +# false positives. +if (CMAKE_COMPILER_IS_GNUCXX) + check_cxx_compiler_flag("-Wmaybe-uninitialized" HAS_MAYBE_UNINITIALIZED) + if (HAS_MAYBE_UNINITIALIZED) + set(USE_NO_MAYBE_UNINITIALIZED 1) + else() + # Only recent versions of gcc make the distinction between -Wuninitialized + # and -Wmaybe-uninitialized. If -Wmaybe-uninitialized isn't supported, just + # turn off all uninitialized use warnings. + check_cxx_compiler_flag("-Wuninitialized" HAS_UNINITIALIZED) + set(USE_NO_UNINITIALIZED ${HAS_UNINITIALIZED}) + endif() +endif() + include(GetHostTriple) get_host_triple(LLVM_HOST_TRIPLE) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 43ee9a08b27f..f0b31ce65385 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -157,12 +157,7 @@ endmacro(add_llvm_external_project) # Generic support for adding a unittest. function(add_unittest test_suite test_name) - if (CMAKE_BUILD_TYPE) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}) - else() - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) if( NOT LLVM_BUILD_TESTS ) set(EXCLUDE_FROM_ALL ON) endif() @@ -239,8 +234,8 @@ function(configure_lit_site_cfg input output) set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) - set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") - set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/%(build_config)s") + set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_mode)s") + set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/%(build_mode)s") set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED}) set(SHLIBPATH_VAR ${SHLIBPATH_VAR}) @@ -251,8 +246,8 @@ function(configure_lit_site_cfg input output) set(ENABLE_ASSERTIONS "0") endif() - set(HOST_OS ${CMAKE_HOST_SYSTEM_NAME}) - set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) + set(HOST_OS ${CMAKE_SYSTEM_NAME}) + set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR}) configure_file(${input} ${output} @ONLY) endfunction() @@ -266,18 +261,23 @@ function(add_lit_target target comment) set(LIT_COMMAND ${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py - --param build_config=${CMAKE_CFG_INTDIR} - --param build_mode=${RUNTIME_BUILD_MODE} + --param build_mode=${CMAKE_CFG_INTDIR} ${LIT_ARGS} ) foreach(param ${ARG_PARAMS}) list(APPEND LIT_COMMAND --param ${param}) endforeach() - add_custom_target(${target} - COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS} - COMMENT "${comment}" - ) - add_dependencies(${target} ${ARG_DEPENDS}) + if( ARG_DEPENDS ) + add_custom_target(${target} + COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS} + COMMENT "${comment}" + ) + add_dependencies(${target} ${ARG_DEPENDS}) + else() + add_custom_target(${target} + COMMAND cmake -E echo "${target} does nothing, no tools built.") + message(STATUS "${target} does nothing.") + endif() endfunction() # A function to add a set of lit test suites to be driven through 'check-*' targets. diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake new file mode 100644 index 000000000000..acccc12a94e8 --- /dev/null +++ b/cmake/modules/GetSVN.cmake @@ -0,0 +1,25 @@ +# CMake project that writes Subversion revision information to a header. +# +# Input variables: +# FIRST_SOURCE_DIR - First source directory +# FIRST_REPOSITORY - The macro to define to the first revision number. +# SECOND_SOURCE_DIR - Second source directory +# SECOND_REPOSITORY - The macro to define to the second revision number. +# HEADER_FILE - The header file to write +include(FindSubversion) +if (Subversion_FOUND AND EXISTS "${FIRST_SOURCE_DIR}/.svn") + # Repository information for the first repository. + Subversion_WC_INFO(${FIRST_SOURCE_DIR} MY) + file(WRITE ${HEADER_FILE}.txt "#define ${FIRST_REPOSITORY} \"${MY_WC_REVISION}\"\n") + + # Repository information for the second repository. + if (EXISTS "${SECOND_SOURCE_DIR}/.svn") + Subversion_WC_INFO(${SECOND_SOURCE_DIR} MY) + file(APPEND ${HEADER_FILE}.txt + "#define ${SECOND_REPOSITORY} \"${MY_WC_REVISION}\"\n") + endif () + + # Copy the file only if it has changed. + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${HEADER_FILE}.txt ${HEADER_FILE}) +endif() diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index b5f96e8f7114..4e59a3e3d906 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -3,6 +3,8 @@ # selections. include(AddLLVMDefinitions) +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) if( CMAKE_COMPILER_IS_GNUCXX ) set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) @@ -10,20 +12,6 @@ elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) endif() -# Run-time build mode; It is used for unittests. -if(MSVC_IDE) - # Expect "$(Configuration)", "$(OutDir)", etc. - # It is expanded by msbuild or similar. - set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}") -elseif(NOT CMAKE_BUILD_TYPE STREQUAL "") - # Expect "Release" "Debug", etc. - # Or unittests could not run. - set(RUNTIME_BUILD_MODE ${CMAKE_BUILD_TYPE}) -else() - # It might be "." - set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}") -endif() - if( LLVM_ENABLE_ASSERTIONS ) # MSVC doesn't like _DEBUG on release builds. See PR 4379. if( NOT MSVC ) @@ -71,6 +59,39 @@ else(WIN32) endif(UNIX) endif(WIN32) +function(add_flag_or_print_warning flag) + check_c_compiler_flag(${flag} C_SUPPORTS_FLAG) + check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG) + if (C_SUPPORTS_FLAG AND CXX_SUPPORTS_FLAG) + message(STATUS "Building with ${flag}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + else() + message(WARNING "${flag} is not supported.") + endif() +endfunction() + +function(append value) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) +endfunction() + +function(append_if condition value) + if (${condition}) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) + endif() +endfunction() + +macro(add_flag_if_supported flag) + check_c_compiler_flag(${flag} C_SUPPORTS_FLAG) + append_if(C_SUPPORTS_FLAG "${flag}" CMAKE_C_FLAGS) + check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG) + append_if(CXX_SUPPORTS_FLAG "${flag}" CMAKE_CXX_FLAGS) +endmacro() + if( LLVM_ENABLE_PIC ) if( XCODE ) # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't @@ -79,24 +100,14 @@ if( LLVM_ENABLE_PIC ) elseif( WIN32 OR CYGWIN) # On Windows all code is PIC. MinGW warns if -fPIC is used. else() - include(CheckCXXCompilerFlag) - check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG) - if( SUPPORTS_FPIC_FLAG ) - message(STATUS "Building with -fPIC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - else( SUPPORTS_FPIC_FLAG ) - message(WARNING "-fPIC not supported.") - endif() + add_flag_or_print_warning("-fPIC") if( WIN32 OR CYGWIN) # MinGW warns if -fvisibility-inlines-hidden is used. else() check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) - if( SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") - endif() - endif() + append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS) + endif() endif() endif() @@ -168,6 +179,7 @@ if( MSVC ) -wd4551 # Suppress 'function call missing argument list' -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' -wd4715 # Suppress ''function' : not all control paths return a value' + -wd4722 # Suppress ''function' : destructor never returns, potential memory leak' -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' # Promoted warnings. @@ -175,7 +187,6 @@ if( MSVC ) # Promoted warnings to errors. -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error. - -we4239 # Promote 'nonstandard extension used : 'token' : conversion from 'type' to 'type'' to error. ) # Enable warnings @@ -190,20 +201,67 @@ if( MSVC ) endif (LLVM_ENABLE_WERROR) elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) if (LLVM_ENABLE_WARNINGS) - add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings ) - if (LLVM_ENABLE_PEDANTIC) - add_llvm_definitions( -pedantic -Wno-long-long ) - endif (LLVM_ENABLE_PEDANTIC) - check_cxx_compiler_flag("-Werror -Wcovered-switch-default" SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG) - if( SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG ) - add_llvm_definitions( -Wcovered-switch-default ) + append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + + # Turn off missing field initializer warnings for gcc to avoid noise from + # false positives with empty {}. Turn them on otherwise (they're off by + # default for clang). + check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) + if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) + if (CMAKE_COMPILER_IS_GNUCXX) + append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + else() + append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() endif() + + append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + check_cxx_compiler_flag("-Werror -Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG) + append_if(CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_CXX_FLAGS) + check_c_compiler_flag("-Werror -Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG) + append_if(C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_C_FLAGS) + append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) + append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) + check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor" CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG) + append_if(CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) endif (LLVM_ENABLE_WARNINGS) if (LLVM_ENABLE_WERROR) add_llvm_definitions( -Werror ) endif (LLVM_ENABLE_WERROR) endif( MSVC ) +macro(append_common_sanitizer_flags) + # Append -fno-omit-frame-pointer and turn on debug info to get better + # stack traces. + add_flag_if_supported("-fno-omit-frame-pointer") + if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND + NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO") + add_flag_if_supported("-gline-tables-only") + endif() +endmacro() + +# Turn on sanitizers if necessary. +if(LLVM_USE_SANITIZER) + if (LLVM_ON_UNIX) + if (LLVM_USE_SANITIZER STREQUAL "Address") + append_common_sanitizer_flags() + add_flag_or_print_warning("-fsanitize=address") + elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?") + append_common_sanitizer_flags() + add_flag_or_print_warning("-fsanitize=memory") + # -pie is required for MSan. + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") + if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins") + add_flag_or_print_warning("-fsanitize-memory-track-origins") + endif() + else() + message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") + endif() + else() + message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.") + endif() +endif() + add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) add_llvm_definitions( -D__STDC_FORMAT_MACROS ) add_llvm_definitions( -D__STDC_LIMIT_MACROS ) diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake index 574335c49d0e..163401c857c5 100755 --- a/cmake/modules/LLVM-Config.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -4,11 +4,14 @@ function(get_system_libs return_var) if( MINGW ) set(system_libs ${system_libs} imagehlp psapi) elseif( CMAKE_HOST_UNIX ) + if( HAVE_LIBRT ) + set(system_libs ${system_libs} rt) + endif() if( HAVE_LIBDL ) - set(system_libs ${system_libs} ${CMAKE_DL_LIBS}) + set(system_libs ${system_libs} ${CMAKE_DL_LIBS}) endif() if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD ) - set(system_libs ${system_libs} pthread) + set(system_libs ${system_libs} pthread) endif() endif( MINGW ) endif( NOT MSVC ) diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake index d6a2ae5f45f5..26314d4126d2 100644 --- a/cmake/modules/VersionFromVCS.cmake +++ b/cmake/modules/VersionFromVCS.cmake @@ -20,49 +20,51 @@ function(add_version_info_from_vcs VERS) elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git ) set(result "${result}git") # Try to get a ref-id - find_program(git_executable NAMES git git.exe git.cmd) - if( git_executable ) - set(is_git_svn_rev_exact false) - execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if( git_result EQUAL 0 ) - string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output}) - string(LENGTH "${git_svn_rev}" rev_length) - math(EXPR rev_length "${rev_length}-1") - string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number) - set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE) - set(git_svn_rev "-svn-${git_svn_rev}") - - # Determine if the HEAD points directly at a subversion revision. - execute_process(COMMAND ${git_executable} svn find-rev HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) + if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git/svn ) + find_program(git_executable NAMES git git.exe git.cmd) + if( git_executable ) + set(is_git_svn_rev_exact false) + execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output) if( git_result EQUAL 0 ) - string(STRIP "${git_output}" git_head_svn_rev_number) - if( git_head_svn_rev_number EQUAL git_svn_rev_number ) - set(is_git_svn_rev_exact true) + string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output}) + string(LENGTH "${git_svn_rev}" rev_length) + math(EXPR rev_length "${rev_length}-1") + string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number) + set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE) + set(git_svn_rev "-svn-${git_svn_rev}") + + # Determine if the HEAD points directly at a subversion revision. + execute_process(COMMAND ${git_executable} svn find-rev HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output) + if( git_result EQUAL 0 ) + string(STRIP "${git_output}" git_head_svn_rev_number) + if( git_head_svn_rev_number EQUAL git_svn_rev_number ) + set(is_git_svn_rev_exact true) + endif() endif() + else() + set(git_svn_rev "") + endif() + execute_process(COMMAND + ${git_executable} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output) + if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact ) + string(STRIP "${git_output}" git_ref_id) + set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) + set(result "${result}${git_svn_rev}-${git_ref_id}") + else() + set(result "${result}${git_svn_rev}") endif() - else() - set(git_svn_rev "") - endif() - execute_process(COMMAND - ${git_executable} rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact ) - string(STRIP "${git_output}" git_ref_id) - set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) - set(result "${result}${git_svn_rev}-${git_ref_id}") - else() - set(result "${result}${git_svn_rev}") endif() endif() endif() diff --git a/configure b/configure index 4fa070549196..e70b13100a70 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for LLVM 3.2svn. +# Generated by GNU Autoconf 2.60 for LLVM 3.3svn. # # Report bugs to . # @@ -9,7 +9,7 @@ # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # -# Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. +# Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## @@ -561,11 +561,11 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='LLVM' PACKAGE_TARNAME='llvm' -PACKAGE_VERSION='3.2svn' -PACKAGE_STRING='LLVM 3.2svn' +PACKAGE_VERSION='3.3svn' +PACKAGE_STRING='LLVM 3.3svn' PACKAGE_BUGREPORT='http://llvm.org/bugs/' -ac_unique_file="lib/VMCore/Module.cpp" +ac_unique_file="lib/IR/Module.cpp" # Factoring default headers for most tests. ac_includes_default="\ #include @@ -685,6 +685,9 @@ BUILD_CXX CVSBUILD ENABLE_LIBCPP ENABLE_CXX11 +ENABLE_CLANG_ARCMT +ENABLE_CLANG_STATIC_ANALYZER +ENABLE_CLANG_REWRITER ENABLE_OPTIMIZED ENABLE_PROFILING DISABLE_ASSERTIONS @@ -764,6 +767,9 @@ LIBADD_DL NO_VARIADIC_MACROS NO_MISSING_FIELD_INITIALIZERS COVERED_SWITCH_DEFAULT +NO_MAYBE_UNINITIALIZED +NO_UNINITIALIZED +PYTHON USE_UDIS86 USE_OPROFILE USE_INTEL_JITEVENTS @@ -1321,7 +1327,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures LLVM 3.2svn to adapt to many kinds of systems. +\`configure' configures LLVM 3.3svn to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1387,7 +1393,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of LLVM 3.2svn:";; + short | recursive ) echo "Configuration of LLVM 3.3svn:";; esac cat <<\_ACEOF @@ -1397,6 +1403,11 @@ Optional Features: --enable-polly Use polly if available (default is YES) --enable-libcpp Use libc++ if available (default is NO) --enable-cxx11 Use c++11 if available (default is NO) + --enable-clang-arcmt Enable building of clang ARCMT (default is YES) + --enable-clang-static-analyzer + Enable building of clang Static Analyzer (default is + YES) + --enable-clang-rewriter Enable building of clang rewriter (default is YES) --enable-optimized Compile with optimizations enabled (default is NO) --enable-profiling Compile with profiling enabled (default is NO) --enable-assertions Compile with assertion checks enabled (default is @@ -1427,7 +1438,7 @@ Optional Features: YES) --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, - x86_64, sparc, powerpc, arm, mips, spu, hexagon, + x86_64, sparc, powerpc, arm, aarch64, mips, hexagon, xcore, msp430, nvptx, and cpp (default=all) --enable-experimental-targets Build experimental host targets: disable or @@ -1459,6 +1470,7 @@ Optional Packages: --with-bug-report-url Specify the URL where bug reports should be submitted (default=http://llvm.org/bugs/) --with-internal-prefix Installation directory for internal files + --with-python path to python --with-udis86= Use udis86 external x86 disassembler library --with-oprofile= Tell OProfile >= 0.9.4 how to symbolize JIT output @@ -1540,7 +1552,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -LLVM configure 3.2svn +LLVM configure 3.3svn generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1548,7 +1560,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. -Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign. _ACEOF exit fi @@ -1556,7 +1568,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by LLVM $as_me 3.2svn, which was +It was created by LLVM $as_me 3.3svn, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -1916,11 +1928,11 @@ _ACEOF cat >>confdefs.h <<\_ACEOF -#define LLVM_VERSION_MINOR 2 +#define LLVM_VERSION_MINOR 3 _ACEOF -LLVM_COPYRIGHT="Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign." +LLVM_COPYRIGHT="Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign." @@ -1968,6 +1980,9 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;} fi fi +${CFLAGS=} +${CXXFLAGS=} + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -3463,6 +3478,98 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test "$CXX" = "clang++" ; then + { echo "$as_me:$LINENO: checking whether clang works" >&5 +echo $ECHO_N "checking whether clang works... $ECHO_C" >&6; } + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#if __has_include () +#include +#endif +#if __has_include () +#include +#endif + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + { { echo "$as_me:$LINENO: error: Selected compiler could not find or parse C++ standard library headers. Rerun with CC=c-compiler CXX=c++-compiler ./configure ..." >&5 +echo "$as_me: error: Selected compiler could not find or parse C++ standard library headers. Rerun with CC=c-compiler CXX=c++-compiler ./configure ..." >&2;} + { (exit 1); exit 1; }; } + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi + if test -d ${srcdir}/projects/llvm-gcc ; then @@ -3904,6 +4011,7 @@ else sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; + aarch64*-*) llvm_cv_target_arch="AArch64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -3937,6 +4045,7 @@ case $host in sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; arm*-*) host_arch="ARM" ;; + aarch64*-*) host_arch="AArch64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -5047,6 +5156,77 @@ echo "$as_me: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" { (exit 1); exit 1; }; } ;; esac +clang_arcmt="yes" +# Check whether --enable-clang-arcmt was given. +if test "${enable_clang_arcmt+set}" = set; then + enableval=$enable_clang_arcmt; clang_arcmt="$enableval" +else + enableval="yes" +fi + +case "$enableval" in + yes) ENABLE_CLANG_ARCMT=1 + ;; + no) ENABLE_CLANG_ARCMT=0 + ;; + default) ENABLE_CLANG_ARCMT=1 +;; + *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-arcmt. Use \"yes\" or \"no\"" >&5 +echo "$as_me: error: Invalid setting for --enable-clang-arcmt. Use \"yes\" or \"no\"" >&2;} + { (exit 1); exit 1; }; } ;; +esac + +clang_static_analyzer="yes" +# Check whether --enable-clang-static-analyzer was given. +if test "${enable_clang_static_analyzer+set}" = set; then + enableval=$enable_clang_static_analyzer; clang_static_analyzer="$enableval" +else + enableval="yes" +fi + +case "$enableval" in + yes) ENABLE_CLANG_STATIC_ANALYZER=1 + ;; + no) ENABLE_CLANG_STATIC_ANALYZER=0 + ;; + default) ENABLE_CLANG_STATIC_ANALYZER=1 +;; + *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-static-analyzer. Use \"yes\" or \"no\"" >&5 +echo "$as_me: error: Invalid setting for --enable-clang-static-analyzer. Use \"yes\" or \"no\"" >&2;} + { (exit 1); exit 1; }; } ;; +esac + +# Check whether --enable-clang-rewriter was given. +if test "${enable_clang_rewriter+set}" = set; then + enableval=$enable_clang_rewriter; +else + enableval="yes" +fi + +case "$enableval" in + yes) ENABLE_CLANG_REWRITER=1 + ;; + no) + if test ${clang_arcmt} != "no" ; then + { { echo "$as_me:$LINENO: error: Cannot enable clang ARC Migration Tool while disabling rewriter." >&5 +echo "$as_me: error: Cannot enable clang ARC Migration Tool while disabling rewriter." >&2;} + { (exit 1); exit 1; }; } + fi + if test ${clang_static_analyzer} != "no" ; then + { { echo "$as_me:$LINENO: error: Cannot enable clang static analyzer while disabling rewriter." >&5 +echo "$as_me: error: Cannot enable clang static analyzer while disabling rewriter." >&2;} + { (exit 1); exit 1; }; } + fi + ENABLE_CLANG_REWRITER=0 + + ;; + default) ENABLE_CLANG_REWRITER=1 +;; + *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-rewriter. Use \"yes\" or \"no\"" >&5 +echo "$as_me: error: Invalid setting for --enable-clang-rewriter. Use \"yes\" or \"no\"" >&2;} + { (exit 1); exit 1; }; } ;; +esac + # Check whether --enable-optimized was given. if test "${enable_optimized+set}" = set; then enableval=$enable_optimized; @@ -5197,6 +5377,8 @@ else x86_64) TARGET_HAS_JIT=1 ;; ARM) TARGET_HAS_JIT=1 + ;; + AArch64) TARGET_HAS_JIT=0 ;; Mips) TARGET_HAS_JIT=1 ;; @@ -5419,19 +5601,19 @@ if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; @@ -5446,7 +5628,6 @@ case "$enableval" in ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; @@ -10316,7 +10497,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <conftest.$ac_ext <<_ACEOF -#if !defined(__GNUC__) || __GNUC__ < 3 -#error Unsupported GCC version -#endif + + /* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #if !defined(__GNUC__) || __GNUC__ < 3 + #error Unsupported GCC version + #endif + _ACEOF rm -f conftest.$ac_objext @@ -12071,14 +12260,112 @@ echo "${ECHO_T}ok" >&6; } { echo "$as_me:$LINENO: checking optional compiler flags" >&5 echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; } -NO_VARIADIC_MACROS=`$CXX -Werror -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros` +NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros` -NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers` +NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers` COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default` -{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&5 -echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&6; } + +NO_UNINITIALIZED= +NO_MAYBE_UNINITIALIZED= +if test "$GXX" = "yes" +then + NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized` + + if test -z "$NO_MAYBE_UNINITIALIZED" + then + NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized` + + fi +fi +{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&5 +echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&6; } + + +# Check whether --with-python was given. +if test "${with_python+set}" = set; then + withval=$with_python; PYTHON="$withval" +fi + + +if test -n "$PYTHON" && test -x "$PYTHON" ; then + { echo "$as_me:$LINENO: checking for python" >&5 +echo $ECHO_N "checking for python... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: result: user defined: $with_python" >&5 +echo "${ECHO_T}user defined: $with_python" >&6; } +else + if test -n "$PYTHON" ; then + { echo "$as_me:$LINENO: WARNING: specified python ($PYTHON) is not usable, searching path" >&5 +echo "$as_me: WARNING: specified python ($PYTHON) is not usable, searching path" >&2;} + fi + + # Extract the first word of "python python2 python26", so it can be a program name with args. +set dummy python python2 python26; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_path_PYTHON+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $PYTHON in + [\\/]* | ?:[\\/]*) + ac_cv_path_PYTHON="$PYTHON" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_PYTHON="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_path_PYTHON" && ac_cv_path_PYTHON="{ echo "$as_me:$LINENO: result: not found" >&5 +echo "${ECHO_T}not found" >&6; } + { { echo "$as_me:$LINENO: error: could not find python 2.5 or higher" >&5 +echo "$as_me: error: could not find python 2.5 or higher" >&2;} + { (exit 1); exit 1; }; }" + ;; +esac +fi +PYTHON=$ac_cv_path_PYTHON +if test -n "$PYTHON"; then + { echo "$as_me:$LINENO: result: $PYTHON" >&5 +echo "${ECHO_T}$PYTHON" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +fi + +{ echo "$as_me:$LINENO: checking for python >= 2.5" >&5 +echo $ECHO_N "checking for python >= 2.5... $ECHO_C" >&6; } +ac_python_version=`$PYTHON -c 'import sys; print sys.version.split()[0]'` +ac_python_version_major=`echo $ac_python_version | cut -d'.' -f1` +ac_python_version_minor=`echo $ac_python_version | cut -d'.' -f2` +ac_python_version_patch=`echo $ac_python_version | cut -d'.' -f3` +if test "$ac_python_version_major" -eq "2" \ + && test "$ac_python_version_minor" -ge "5" ; then + { echo "$as_me:$LINENO: result: $PYTHON ($ac_python_version)" >&5 +echo "${ECHO_T}$PYTHON ($ac_python_version)" >&6; } +else + { echo "$as_me:$LINENO: result: not found" >&5 +echo "${ECHO_T}not found" >&6; } + { { echo "$as_me:$LINENO: error: found python $ac_python_version ($PYTHON); required >= 2.5 +See \`config.log' for more details." >&5 +echo "$as_me: error: found python $ac_python_version ($PYTHON); required >= 2.5 +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi @@ -12440,6 +12727,106 @@ echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;} fi +{ echo "$as_me:$LINENO: checking for library containing clock_gettime" >&5 +echo $ECHO_N "checking for library containing clock_gettime... $ECHO_C" >&6; } +if test "${ac_cv_search_clock_gettime+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_func_search_save_LIBS=$LIBS +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char clock_gettime (); +int +main () +{ +return clock_gettime (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_search_clock_gettime=$ac_res +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if test "${ac_cv_search_clock_gettime+set}" = set; then + break +fi +done +if test "${ac_cv_search_clock_gettime+set}" = set; then + : +else + ac_cv_search_clock_gettime=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_search_clock_gettime" >&5 +echo "${ECHO_T}$ac_cv_search_clock_gettime" >&6; } +ac_res=$ac_cv_search_clock_gettime +if test "$ac_res" != no; then + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + if test "$llvm_cv_enable_libffi" = "yes" ; then { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5 echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; } @@ -15557,6 +15944,178 @@ fi done +{ echo "$as_me:$LINENO: checking whether FE_ALL_EXCEPT is declared" >&5 +echo $ECHO_N "checking whether FE_ALL_EXCEPT is declared... $ECHO_C" >&6; } +if test "${ac_cv_have_decl_FE_ALL_EXCEPT+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef FE_ALL_EXCEPT + char *p = (char *) FE_ALL_EXCEPT; + return !p; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_FE_ALL_EXCEPT=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_have_decl_FE_ALL_EXCEPT=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_ALL_EXCEPT" >&5 +echo "${ECHO_T}$ac_cv_have_decl_FE_ALL_EXCEPT" >&6; } +if test $ac_cv_have_decl_FE_ALL_EXCEPT = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_ALL_EXCEPT 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_ALL_EXCEPT 0 +_ACEOF + + +fi +{ echo "$as_me:$LINENO: checking whether FE_INEXACT is declared" >&5 +echo $ECHO_N "checking whether FE_INEXACT is declared... $ECHO_C" >&6; } +if test "${ac_cv_have_decl_FE_INEXACT+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef FE_INEXACT + char *p = (char *) FE_INEXACT; + return !p; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_FE_INEXACT=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_have_decl_FE_INEXACT=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_INEXACT" >&5 +echo "${ECHO_T}$ac_cv_have_decl_FE_INEXACT" >&6; } +if test $ac_cv_have_decl_FE_INEXACT = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_INEXACT 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_INEXACT 0 +_ACEOF + + +fi + + if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then for ac_header in pthread.h @@ -16082,16 +16641,19 @@ done { echo "$as_me:$LINENO: checking __crashreporter_info__" >&5 echo $ECHO_N "checking __crashreporter_info__... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ + + /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -extern const char *__crashreporter_info__; - int main() { - __crashreporter_info__ = "test"; - return 0; - } + + extern const char *__crashreporter_info__; + int main() { + __crashreporter_info__ = "test"; + return 0; + } + _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext @@ -16128,6 +16690,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then + { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } @@ -16135,17 +16698,20 @@ cat >>confdefs.h <<\_ACEOF #define HAVE_CRASHREPORTER_INFO 1 _ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 + + { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } cat >>confdefs.h <<\_ACEOF #define HAVE_CRASHREPORTER_INFO 0 _ACEOF + fi rm -f core conftest.err conftest.$ac_objext \ @@ -16153,7 +16719,6 @@ rm -f core conftest.err conftest.$ac_objext \ - { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5 echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; } if test "${ac_cv_huge_val_sanity+set}" = set; then @@ -16956,6 +17521,120 @@ done +for ac_func in log log2 log10 exp exp2 +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + + + + for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` @@ -17728,18 +18407,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in " >&5 + + { echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in " >&5 echo $ECHO_N "checking for srand48/lrand48/drand48 in ... $ECHO_C" >&6; } if test "${ac_cv_func_rand48+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -17797,12 +18478,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5 echo "${ECHO_T}$ac_cv_func_rand48" >&6; } @@ -19272,10 +19954,19 @@ if test "$llvm_cv_os_type" = "MingW" ; then { echo "$as_me:$LINENO: checking whether EnumerateLoadedModules() accepts new decl" >&5 echo $ECHO_N "checking whether EnumerateLoadedModules() accepts new decl... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF -#include -#include -extern void foo(PENUMLOADED_MODULES_CALLBACK); -extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); + + /* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + #include + #include + extern void foo(PENUMLOADED_MODULES_CALLBACK); + extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); + + _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" @@ -19336,18 +20027,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for isnan in " >&5 + + { echo "$as_me:$LINENO: checking for isnan in " >&5 echo $ECHO_N "checking for isnan in ... $ECHO_C" >&6; } if test "${ac_cv_func_isnan_in_math_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19405,12 +20098,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_math_h" >&5 echo "${ECHO_T}$ac_cv_func_isnan_in_math_h" >&6; } @@ -19424,18 +20118,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for isnan in " >&5 + + { echo "$as_me:$LINENO: checking for isnan in " >&5 echo $ECHO_N "checking for isnan in ... $ECHO_C" >&6; } if test "${ac_cv_func_isnan_in_cmath+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19493,12 +20189,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_cmath" >&5 echo "${ECHO_T}$ac_cv_func_isnan_in_cmath" >&6; } @@ -19511,18 +20208,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for std::isnan in " >&5 + + { echo "$as_me:$LINENO: checking for std::isnan in " >&5 echo $ECHO_N "checking for std::isnan in ... $ECHO_C" >&6; } if test "${ac_cv_func_std_isnan_in_cmath+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19580,12 +20279,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_std_isnan_in_cmath" >&5 echo "${ECHO_T}$ac_cv_func_std_isnan_in_cmath" >&6; } @@ -19599,18 +20299,21 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for isinf in " >&5 + + + { echo "$as_me:$LINENO: checking for isinf in " >&5 echo $ECHO_N "checking for isinf in ... $ECHO_C" >&6; } if test "${ac_cv_func_isinf_in_math_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19668,12 +20371,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_math_h" >&5 echo "${ECHO_T}$ac_cv_func_isinf_in_math_h" >&6; } @@ -19686,18 +20390,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for isinf in " >&5 + + { echo "$as_me:$LINENO: checking for isinf in " >&5 echo $ECHO_N "checking for isinf in ... $ECHO_C" >&6; } if test "${ac_cv_func_isinf_in_cmath+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19755,12 +20461,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_cmath" >&5 echo "${ECHO_T}$ac_cv_func_isinf_in_cmath" >&6; } @@ -19773,18 +20480,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for std::isinf in " >&5 + + { echo "$as_me:$LINENO: checking for std::isinf in " >&5 echo $ECHO_N "checking for std::isinf in ... $ECHO_C" >&6; } if test "${ac_cv_func_std_isinf_in_cmath+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19842,12 +20551,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_std_isinf_in_cmath" >&5 echo "${ECHO_T}$ac_cv_func_std_isinf_in_cmath" >&6; } @@ -19860,18 +20570,20 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for finite in " >&5 + + { echo "$as_me:$LINENO: checking for finite in " >&5 echo $ECHO_N "checking for finite in ... $ECHO_C" >&6; } if test "${ac_cv_func_finite_in_ieeefp_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - ac_ext=cpp + + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -19929,12 +20641,13 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + fi { echo "$as_me:$LINENO: result: $ac_cv_func_finite_in_ieeefp_h" >&5 echo "${ECHO_T}$ac_cv_func_finite_in_ieeefp_h" >&6; } @@ -20551,19 +21264,22 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex ac_compiler_gnu=$ac_cv_cxx_compiler_gnu cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ + + /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -int main() { - volatile unsigned long val = 1; - __sync_synchronize(); - __sync_val_compare_and_swap(&val, 1, 0); - __sync_add_and_fetch(&val, 1); - __sync_sub_and_fetch(&val, 1); - return 0; - } + + int main() { + volatile unsigned long val = 1; + __sync_synchronize(); + __sync_val_compare_and_swap(&val, 1, 0); + __sync_add_and_fetch(&val, 1); + __sync_sub_and_fetch(&val, 1); + return 0; + } + _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext @@ -20580,7 +21296,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; @@ -20600,11 +21316,6 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } @@ -20613,11 +21324,13 @@ cat >>confdefs.h <<\_ACEOF #define LLVM_HAS_ATOMICS 1 _ACEOF + else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { echo "$as_me:$LINENO: result: no" >&5 + + { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } cat >>confdefs.h <<\_ACEOF @@ -20626,10 +21339,17 @@ _ACEOF { echo "$as_me:$LINENO: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&5 echo "$as_me: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&2;} + fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then @@ -21574,7 +22294,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by LLVM $as_me 3.2svn, which was +This file was extended by LLVM $as_me 3.3svn, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -21627,7 +22347,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -LLVM config.status 3.2svn +LLVM config.status 3.3svn configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" @@ -21912,6 +22632,9 @@ BUILD_CXX!$BUILD_CXX$ac_delim CVSBUILD!$CVSBUILD$ac_delim ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim ENABLE_CXX11!$ENABLE_CXX11$ac_delim +ENABLE_CLANG_ARCMT!$ENABLE_CLANG_ARCMT$ac_delim +ENABLE_CLANG_STATIC_ANALYZER!$ENABLE_CLANG_STATIC_ANALYZER$ac_delim +ENABLE_CLANG_REWRITER!$ENABLE_CLANG_REWRITER$ac_delim ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim @@ -21923,9 +22646,6 @@ DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim JIT!$JIT$ac_delim TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim -ENABLE_DOCS!$ENABLE_DOCS$ac_delim -ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim -LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -21967,6 +22687,9 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +ENABLE_DOCS!$ENABLE_DOCS$ac_delim +ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim +LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim ENABLE_PIC!$ENABLE_PIC$ac_delim ENABLE_SHARED!$ENABLE_SHARED$ac_delim @@ -22032,6 +22755,9 @@ LIBADD_DL!$LIBADD_DL$ac_delim NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim +NO_MAYBE_UNINITIALIZED!$NO_MAYBE_UNINITIALIZED$ac_delim +NO_UNINITIALIZED!$NO_UNINITIALIZED$ac_delim +PYTHON!$PYTHON$ac_delim USE_UDIS86!$USE_UDIS86$ac_delim USE_OPROFILE!$USE_OPROFILE$ac_delim USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim @@ -22058,13 +22784,9 @@ ALL_BINDINGS!$ALL_BINDINGS$ac_delim OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim RPATH!$RPATH$ac_delim -RDYNAMIC!$RDYNAMIC$ac_delim -program_prefix!$program_prefix$ac_delim -LIBOBJS!$LIBOBJS$ac_delim -LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 @@ -22083,6 +22805,51 @@ fi cat >>$CONFIG_STATUS <<_ACEOF cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +_ACEOF +sed ' +s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g +s/^/s,@/; s/!/@,|#_!!_#|/ +:n +t n +s/'"$ac_delim"'$/,g/; t +s/$/\\/; p +N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n +' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF +CEOF$ac_eof +_ACEOF + + +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + cat >conf$$subs.sed <<_ACEOF +RDYNAMIC!$RDYNAMIC$ac_delim +program_prefix!$program_prefix$ac_delim +LIBOBJS!$LIBOBJS$ac_delim +LTLIBOBJS!$LTLIBOBJS$ac_delim +_ACEOF + + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 4; then + break + elif $ac_last_try; then + { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` +if test -n "$ac_eof"; then + ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` + ac_eof=`expr $ac_eof + 1` +fi + +cat >>$CONFIG_STATUS <<_ACEOF +cat >"\$tmp/subs-3.sed" <<\CEOF$ac_eof /@[a-zA-Z_][a-zA-Z_0-9]*@/!b end _ACEOF sed ' @@ -22345,7 +23112,7 @@ s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t $ac_datarootdir_hack -" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out +" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" | sed -f "$tmp/subs-3.sed" >$tmp/out test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst index fdaec89cdf6b..712d57d14b12 100644 --- a/docs/AliasAnalysis.rst +++ b/docs/AliasAnalysis.rst @@ -1,5 +1,3 @@ -.. _alias_analysis: - ================================== LLVM Alias Analysis Infrastructure ================================== @@ -205,7 +203,7 @@ look at the `various alias analysis implementations`_ included with LLVM. Different Pass styles --------------------- -The first step to determining what type of `LLVM pass `_ +The first step to determining what type of :doc:`LLVM pass ` you need to use for your Alias Analysis. As is the case with most other analyses and transformations, the answer should be fairly obvious from what type of problem you are trying to solve: @@ -253,25 +251,24 @@ Interfaces which may be specified All of the `AliasAnalysis `__ virtual methods -default to providing `chaining`_ to another alias analysis implementation, which -ends up returning conservatively correct information (returning "May" Alias and -"Mod/Ref" for alias and mod/ref queries respectively). Depending on the -capabilities of the analysis you are implementing, you just override the -interfaces you can improve. +default to providing :ref:`chaining ` to another alias +analysis implementation, which ends up returning conservatively correct +information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries +respectively). Depending on the capabilities of the analysis you are +implementing, you just override the interfaces you can improve. -.. _chaining: -.. _chain: +.. _aliasanalysis-chaining: ``AliasAnalysis`` chaining behavior ----------------------------------- -With only one special exception (the `no-aa`_ pass) every alias analysis pass -chains to another alias analysis implementation (for example, the user can -specify "``-basicaa -ds-aa -licm``" to get the maximum benefit from both alias -analyses). The alias analysis class automatically takes care of most of this -for methods that you don't override. For methods that you do override, in code -paths that return a conservative MayAlias or Mod/Ref result, simply return -whatever the superclass computes. For example: +With only one special exception (the :ref:`-no-aa ` pass) +every alias analysis pass chains to another alias analysis implementation (for +example, the user can specify "``-basicaa -ds-aa -licm``" to get the maximum +benefit from both alias analyses). The alias analysis class automatically +takes care of most of this for methods that you don't override. For methods +that you do override, in code paths that return a conservative MayAlias or +Mod/Ref result, simply return whatever the superclass computes. For example: .. code-block:: c++ @@ -504,11 +501,11 @@ Available ``AliasAnalysis`` implementations ------------------------------------------- This section lists the various implementations of the ``AliasAnalysis`` -interface. With the exception of the `-no-aa`_ implementation, all of these -`chain`_ to other alias analysis implementations. +interface. With the exception of the :ref:`-no-aa ` +implementation, all of these :ref:`chain ` to other +alias analysis implementations. -.. _no-aa: -.. _-no-aa: +.. _aliasanalysis-no-aa: The ``-no-aa`` pass ^^^^^^^^^^^^^^^^^^^ diff --git a/docs/Atomics.rst b/docs/Atomics.rst index 1bca53e2b17e..705d73fbaba4 100644 --- a/docs/Atomics.rst +++ b/docs/Atomics.rst @@ -1,5 +1,3 @@ -.. _atomics: - ============================================== LLVM Atomic Instructions and Concurrency Guide ============================================== diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index bd26f7b1502e..c83b6c1801cd 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -1,5 +1,3 @@ -.. _bitcode_format: - .. role:: raw-html(raw) :format: html @@ -54,8 +52,8 @@ structure. This structure consists of the following concepts: * Abbreviations, which specify compression optimizations for the file. -Note that the `llvm-bcanalyzer `_ tool -can be used to dump and inspect arbitrary bitstreams, which is very useful for +Note that the :doc:`llvm-bcanalyzer ` tool can be +used to dump and inspect arbitrary bitstreams, which is very useful for understanding the encoding. .. _magic number: diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index f0df971f8745..71ecd34c8216 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -1,5 +1,3 @@ -.. _branch_weight: - =========================== LLVM Branch Weight Metadata =========================== @@ -27,8 +25,8 @@ Supported Instructions ``BranchInst`` ^^^^^^^^^^^^^^ -Metadata is only assign to the conditional branches. There are two extra -operarands, for the true and the false branch. +Metadata is only assigned to the conditional branches. There are two extra +operarands for the true and the false branch. .. code-block:: llvm @@ -41,8 +39,8 @@ operarands, for the true and the false branch. ``SwitchInst`` ^^^^^^^^^^^^^^ -Branch weights are assign to every case (including ``default`` case which is -always case #0). +Branch weights are assigned to every case (including the ``default`` case which +is always case #0). .. code-block:: llvm @@ -55,7 +53,7 @@ always case #0). ``IndirectBrInst`` ^^^^^^^^^^^^^^^^^^ -Branch weights are assign to every destination. +Branch weights are assigned to every destination. .. code-block:: llvm diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst index 9ccf0cc2d9d6..1a5fc8c02764 100644 --- a/docs/Bugpoint.rst +++ b/docs/Bugpoint.rst @@ -1,5 +1,3 @@ -.. _bugpoint: - ==================================== LLVM bugpoint tool: design and usage ==================================== @@ -136,9 +134,9 @@ non-obvious ways. Here are some hints and tips: It is often useful to capture the output of the program to file. For example, in the C shell, you can run: - .. code-block:: bash + .. code-block:: console - bugpoint ... |& tee bugpoint.log + $ bugpoint ... |& tee bugpoint.log to get a copy of ``bugpoint``'s output in the file ``bugpoint.log``, as well as on your terminal. diff --git a/docs/CMake.rst b/docs/CMake.rst index 7f0420c4469f..fb081d7b98c6 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -1,5 +1,3 @@ -.. _building-with-cmake: - ======================== Building LLVM with CMake ======================== @@ -36,7 +34,7 @@ We use here the command-line, non-interactive CMake interface. #. Create a directory for containing the build. It is not supported to build LLVM on the source directory. cd to this directory: - .. code-block:: bash + .. code-block:: console $ mkdir mybuilddir $ cd mybuilddir @@ -44,7 +42,7 @@ We use here the command-line, non-interactive CMake interface. #. Execute this command on the shell replacing `path/to/llvm/source/root` with the path to the root of your LLVM source tree: - .. code-block:: bash + .. code-block:: console $ cmake path/to/llvm/source/root @@ -80,14 +78,14 @@ the corresponding *Generator* for creating files for your build tool. You can explicitly specify the generator with the command line option ``-G "Name of the generator"``. For knowing the available generators on your platform, execute -.. code-block:: bash +.. code-block:: console $ cmake --help This will list the generator's names at the end of the help text. Generator's names are case-sensitive. Example: -.. code-block:: bash +.. code-block:: console $ cmake -G "Visual Studio 9 2008" path/to/llvm/source/root @@ -110,14 +108,14 @@ Variables customize how the build will be generated. Options are boolean variables, with possible values ON/OFF. Options and variables are defined on the CMake command line like this: -.. code-block:: bash +.. code-block:: console $ cmake -DVARIABLE=value path/to/llvm/source You can set a variable after the initial CMake invocation for changing its value. You can also undefine a variable: -.. code-block:: bash +.. code-block:: console $ cmake -UVARIABLE path/to/llvm/source @@ -127,7 +125,7 @@ on the root of the build directory. Do not hand-edit it. Variables are listed here appending its type after a colon. It is correct to write the variable and the type on the CMake command line: -.. code-block:: bash +.. code-block:: console $ cmake -DVARIABLE:TYPE=value path/to/llvm/source @@ -206,7 +204,7 @@ LLVM-specific variables tests. **LLVM_APPEND_VC_REV**:BOOL - Append version control revision info (svn revision number or git revision id) + Append version control revision info (svn revision number or Git revision id) to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work cmake must be invoked before the build. Defaults to OFF. @@ -280,7 +278,7 @@ Testing is performed when the *check* target is built. For instance, if you are using makefiles, execute this command while on the top level of your build directory: -.. code-block:: bash +.. code-block:: console $ make check @@ -355,13 +353,15 @@ an equivalent variant of snippet shown above: target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES}) +.. _cmake-out-of-source-pass: + Developing LLVM pass out of source ---------------------------------- It is possible to develop LLVM passes against installed LLVM. An example of project layout provided below: -.. code-block:: bash +.. code-block:: none / | diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 5fab76ec1a44..75415ab9ccda 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1,5 +1,3 @@ -.. _code_generator: - ========================================== The LLVM Target-Independent Code Generator ========================================== @@ -17,6 +15,8 @@ The LLVM Target-Independent Code Generator .partial { background-color: #F88017 } .yes { background-color: #0F0; } .yes:before { content: "Y" } + .na { background-color: #6666FF; } + .na:before { content: "N/A" } .. contents:: @@ -172,7 +172,7 @@ architecture. These target descriptions often have a large amount of common information (e.g., an ``add`` instruction is almost identical to a ``sub`` instruction). In order to allow the maximum amount of commonality to be factored out, the LLVM code generator uses the -`TableGen `_ tool to describe big chunks of the +:doc:`TableGen ` tool to describe big chunks of the target machine, which allows the use of domain-specific and target-specific abstractions to reduce the amount of repetition. @@ -230,7 +230,7 @@ for structures, the alignment requirements for various data types, the size of pointers in the target, and whether the target is little-endian or big-endian. -.. _targetlowering: +.. _TargetLowering: The ``TargetLowering`` class ---------------------------- @@ -250,6 +250,8 @@ operations. Among other things, this class indicates: * various high-level characteristics, like whether it is profitable to turn division by a constant into a multiplication sequence. +.. _TargetRegisterInfo: + The ``TargetRegisterInfo`` class -------------------------------- @@ -283,12 +285,10 @@ The ``TargetInstrInfo`` class ----------------------------- The ``TargetInstrInfo`` class is used to describe the machine instructions -supported by the target. It is essentially an array of ``TargetInstrDescriptor`` -objects, each of which describes one instruction the target -supports. Descriptors define things like the mnemonic for the opcode, the number -of operands, the list of implicit register uses and defs, whether the -instruction has certain target-independent properties (accesses memory, is -commutable, etc), and holds any target-specific flags. +supported by the target. Descriptions define things like the mnemonic for +the opcode, the number of operands, the list of implicit register uses and defs, +whether the instruction has certain target-independent properties (accesses +memory, is commutable, etc), and holds any target-specific flags. The ``TargetFrameInfo`` class ----------------------------- @@ -771,6 +771,8 @@ value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a SREM or UREM operation. The `legalize types`_ and `legalize operations`_ phases are responsible for turning an illegal DAG into a legal DAG. +.. _SelectionDAG-Process: + SelectionDAG Instruction Selection Process ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -874,7 +876,7 @@ found, the elements are converted to scalars ("scalarizing"). A target implementation tells the legalizer which types are supported (and which register class to use for them) by calling the ``addRegisterClass`` method in -its TargetLowering constructor. +its ``TargetLowering`` constructor. .. _legalize operations: .. _Legalizer: @@ -968,7 +970,8 @@ The ``FADDS`` instruction is a simple binary single-precision add instruction. To perform this pattern match, the PowerPC backend includes the following instruction definitions: -:: +.. code-block:: text + :emphasize-lines: 4-5,9 def FMADDS : AForm_1<59, 29, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), @@ -980,10 +983,10 @@ instruction definitions: "fadds $FRT, $FRA, $FRB", [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; -The portion of the instruction definition in bold indicates the pattern used to -match the instruction. The DAG operators (like ``fmul``/``fadd``) are defined -in the ``include/llvm/Target/TargetSelectionDAG.td`` file. " ``F4RC``" is the -register class of the input and result values. +The highlighted portion of the instruction definitions indicates the pattern +used to match the instructions. The DAG operators (like ``fmul``/``fadd``) +are defined in the ``include/llvm/Target/TargetSelectionDAG.td`` file. +"``F4RC``" is the register class of the input and result values. The TableGen DAG instruction selector generator reads the instruction patterns in the ``.td`` file and automatically builds parts of the pattern matching code @@ -1035,6 +1038,24 @@ for your target. It has the following strengths: are used to manipulate the input immediate (in this case, take the high or low 16-bits of the immediate). +* When using the 'Pat' class to map a pattern to an instruction that has one + or more complex operands (like e.g. `X86 addressing mode`_), the pattern may + either specify the operand as a whole using a ``ComplexPattern``, or else it + may specify the components of the complex operand separately. The latter is + done e.g. for pre-increment instructions by the PowerPC back end: + + :: + + def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + + def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>; + + Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the + complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction. + * While the system does automate a lot, it still allows you to write custom C++ code to match special cases if there is something that is hard to express. @@ -1727,6 +1748,8 @@ This section of the document explains features or design decisions that are specific to the code generator for a particular target. First we start with a table that summarizes what features are supported by each target. +.. _target-feature-matrix: + Target Feature Matrix --------------------- @@ -1741,12 +1764,14 @@ the key: :raw-html:`` :raw-html:`` :raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1762,12 +1787,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1777,12 +1801,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1792,12 +1815,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1807,12 +1829,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1822,12 +1843,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1837,12 +1857,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1852,12 +1871,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1867,12 +1885,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1882,12 +1899,11 @@ Here is the table: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` :raw-html:`` @@ -1991,8 +2007,8 @@ Tail call optimization Tail call optimization, callee reusing the stack of the caller, is currently supported on x86/x86-64 and PowerPC. It is performed if: -* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC - call convention). +* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC + calling convention) or ``cc 11`` (HiPE calling convention). * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). @@ -2369,17 +2385,17 @@ Dynamic Allocation TODO - More to come. -The PTX backend ---------------- +The NVPTX backend +----------------- -The PTX code generator lives in the lib/Target/PTX directory. It is currently a -work-in-progress, but already supports most of the code generation functionality -needed to generate correct PTX kernels for CUDA devices. +The NVPTX code generator under lib/Target/NVPTX is an open-source version of +the NVIDIA NVPTX code generator for LLVM. It is contributed by NVIDIA and is +a port of the code generator used in the CUDA compiler (nvcc). It targets the +PTX 3.0/3.1 ISA and can target any compute capability greater than or equal to +2.0 (Fermi). -The code generator can target PTX 2.0+, and shader model 1.0+. The PTX ISA -Reference Manual is used as the primary source of ISA information, though an -effort is made to make the output of the code generator match the output of the -NVidia nvcc compiler, whenever possible. +This target is of production quality and should be completely compatible with +the official NVIDIA toolchain. Code Generator Options: @@ -2389,39 +2405,28 @@ Code Generator Options: :raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` -:raw-html:`` +:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` -:raw-html:`` +:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`` -:raw-html:`` -:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` :raw-html:`` :raw-html:`
UnknownNot ApplicableNo supportPartial SupportComplete Support
FeatureARMCellSPUHexagonMBlazeMSP430MipsPTXNVPTXPowerPCSparcX86
is generally reliable
assembly parser
disassembler
inline asm
jit*
.o file writing
tail calls
segmented stacks * Description
``double``If enabled, the map_f64_to_f32 directive is disabled in the PTX output, allowing native double-precision arithmeticsm_20Set shader model/compute capability to 2.0
``no-fma``Disable generation of Fused-Multiply Add instructions, which may be beneficial for some devicessm_21Set shader model/compute capability to 2.1
``smxy / computexy``Set shader model/compute capability to x.y, e.g. sm20 or compute13sm_30Set shader model/compute capability to 3.0
sm_35Set shader model/compute capability to 3.5
ptx30Target PTX 3.0
ptx31Target PTX 3.1
` -Working: - -* Arithmetic instruction selection (including combo FMA) - -* Bitwise instruction selection - -* Control-flow instruction selection - -* Function calls (only on SM 2.0+ and no return arguments) - -* Addresses spaces (0 = global, 1 = constant, 2 = local, 4 = shared) - -* Thread synchronization (bar.sync) - -* Special register reads ([N]TID, [N]CTAID, PMx, CLOCK, etc.) - -In Progress: - -* Robust call instruction selection - -* Stack frame allocation - -* Device-specific instruction scheduling optimizations diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst index 90835307b15c..4d66ad757435 100644 --- a/docs/CodingStandards.rst +++ b/docs/CodingStandards.rst @@ -1,5 +1,3 @@ -.. _coding_standards: - ===================== LLVM Coding Standards ===================== @@ -284,17 +282,10 @@ listed. We prefer these ``#include``\s to be listed in this order: #. Main Module Header #. Local/Private Headers -#. ``llvm/*`` -#. ``llvm/Analysis/*`` -#. ``llvm/Assembly/*`` -#. ``llvm/Bitcode/*`` -#. ``llvm/CodeGen/*`` -#. ... -#. ``llvm/Support/*`` -#. ``llvm/Config/*`` +#. ``llvm/...`` #. System ``#include``\s -and each category should be sorted by name. +and each category should be sorted lexicographically by the full path. The `Main Module Header`_ file applies to ``.cpp`` files which implement an interface defined by a ``.h`` file. This ``#include`` should always be included @@ -409,7 +400,8 @@ code. That said, LLVM does make extensive use of a hand-rolled form of RTTI that use templates like `isa<>, cast<>, and dyn_cast<> `_. -This form of RTTI is opt-in and can be added to any class. It is also +This form of RTTI is opt-in and can be +:doc:`added to any class `. It is also substantially more efficient than ``dynamic_cast<>``. .. _static constructor: @@ -713,8 +705,8 @@ sort of thing is: .. code-block:: c++ bool FoundFoo = false; - for (unsigned i = 0, e = BarList.size(); i != e; ++i) - if (BarList[i]->isFoo()) { + for (unsigned I = 0, E = BarList.size(); I != E; ++I) + if (BarList[I]->isFoo()) { FoundFoo = true; break; } @@ -732,8 +724,8 @@ code to be structured like this: /// \returns true if the specified list has an element that is a foo. static bool containsFoo(const std::vector &List) { - for (unsigned i = 0, e = List.size(); i != e; ++i) - if (List[i]->isFoo()) + for (unsigned I = 0, E = List.size(); I != E; ++I) + if (List[I]->isFoo()) return true; return false; } @@ -820,8 +812,8 @@ Here are some examples of good and bad names: Vehicle MakeVehicle(VehicleType Type) { VehicleMaker M; // Might be OK if having a short life-span. - Tire tmp1 = M.makeTire(); // Bad -- 'tmp1' provides no information. - Light headlight = M.makeLight("head"); // Good -- descriptive. + Tire Tmp1 = M.makeTire(); // Bad -- 'Tmp1' provides no information. + Light Headlight = M.makeLight("head"); // Good -- descriptive. ... } @@ -841,9 +833,9 @@ enforced, and hopefully what to do about it. Here is one complete example: .. code-block:: c++ - inline Value *getOperand(unsigned i) { - assert(i < Operands.size() && "getOperand() out of range!"); - return Operands[i]; + inline Value *getOperand(unsigned I) { + assert(I < Operands.size() && "getOperand() out of range!"); + return Operands[I]; } Here are more examples: @@ -1035,7 +1027,7 @@ form has two problems. First it may be less efficient than evaluating it at the start of the loop. In this case, the cost is probably minor --- a few extra loads every time through the loop. However, if the base expression is more complex, then the cost can rise quickly. I've seen loops where the end -expression was actually something like: "``SomeMap[x]->end()``" and map lookups +expression was actually something like: "``SomeMap[X]->end()``" and map lookups really aren't cheap. By writing it in the second form consistently, you eliminate the issue entirely and don't even have to think about it. @@ -1096,6 +1088,34 @@ flushes the output stream. In other words, these are equivalent: Most of the time, you probably have no reason to flush the output stream, so it's better to use a literal ``'\n'``. +Don't use ``inline`` when defining a function in a class definition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A member function defined in a class definition is implicitly inline, so don't +put the ``inline`` keyword in this case. + +Don't: + +.. code-block:: c++ + + class Foo { + public: + inline void bar() { + // ... + } + }; + +Do: + +.. code-block:: c++ + + class Foo { + public: + void bar() { + // ... + } + }; + Microscopic Details ------------------- @@ -1111,27 +1131,27 @@ macros. For example, this is good: .. code-block:: c++ - if (x) ... - for (i = 0; i != 100; ++i) ... - while (llvm_rocks) ... + if (X) ... + for (I = 0; I != 100; ++I) ... + while (LLVMRocks) ... somefunc(42); assert(3 != 4 && "laws of math are failing me"); - a = foo(42, 92) + bar(x); + A = foo(42, 92) + bar(X); and this is bad: .. code-block:: c++ - if(x) ... - for(i = 0; i != 100; ++i) ... - while(llvm_rocks) ... + if(X) ... + for(I = 0; I != 100; ++I) ... + while(LLVMRocks) ... somefunc (42); assert (3 != 4 && "laws of math are failing me"); - a = foo (42, 92) + bar (x); + A = foo (42, 92) + bar (X); The reason for doing this is not completely arbitrary. This style makes control flow operators stand out more, and makes expressions flow better. The function @@ -1139,11 +1159,11 @@ call operator binds very tightly as a postfix operator. Putting a space after a function name (as in the last example) makes it appear that the code might bind the arguments of the left-hand-side of a binary operator with the argument list of a function and the name of the right side. More specifically, it is easy to -misread the "``a``" example as: +misread the "``A``" example as: .. code-block:: c++ - a = foo ((42, 92) + bar) (x); + A = foo ((42, 92) + bar) (X); when skimming through the code. By avoiding a space in a function, we avoid this misinterpretation. @@ -1310,7 +1330,7 @@ namespace just because it was declared there. See Also ======== -A lot of these comments and recommendations have been culled for other sources. +A lot of these comments and recommendations have been culled from other sources. Two particularly important books for our work are: #. `Effective C++ diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 1d7a462bd71f..fce63ba688cc 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -1,99 +1,79 @@ FileCheck - Flexible pattern matching file verifier =================================================== - SYNOPSIS -------- - -**FileCheck** *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*] - +:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*] DESCRIPTION ----------- +:program:`FileCheck` reads two files (one from standard input, and one +specified on the command line) and uses one to verify the other. This +behavior is particularly useful for the testsuite, which wants to verify that +the output of some tool (e.g. :program:`llc`) contains the expected information +(for example, a movsd from esp or whatever is interesting). This is similar to +using :program:`grep`, but it is optimized for matching multiple different +inputs in one file in a specific order. -**FileCheck** reads two files (one from standard input, and one specified on the -command line) and uses one to verify the other. This behavior is particularly -useful for the testsuite, which wants to verify that the output of some tool -(e.g. llc) contains the expected information (for example, a movsd from esp or -whatever is interesting). This is similar to using grep, but it is optimized -for matching multiple different inputs in one file in a specific order. - -The *match-filename* file specifies the file that contains the patterns to +The ``match-filename`` file specifies the file that contains the patterns to match. The file to verify is always read from standard input. - OPTIONS ------- - - -**-help** +.. option:: -help Print a summary of command line options. +.. option:: --check-prefix prefix + FileCheck searches the contents of ``match-filename`` for patterns to match. + By default, these patterns are prefixed with "``CHECK:``". If you'd like to + use a different prefix (e.g. because the same input file is checking multiple + different tool or options), the :option:`--check-prefix` argument allows you + to specify a specific prefix to match. -**--check-prefix** *prefix* - - FileCheck searches the contents of *match-filename* for patterns to match. By - default, these patterns are prefixed with "CHECK:". If you'd like to use a - different prefix (e.g. because the same input file is checking multiple - different tool or options), the **--check-prefix** argument allows you to specify - a specific prefix to match. - - - -**--input-file** *filename* +.. option:: --input-file filename File to check (defaults to stdin). - -**--strict-whitespace** +.. option:: --strict-whitespace By default, FileCheck canonicalizes input horizontal whitespace (spaces and tabs) which causes it to ignore these differences (a space will match a tab). - The --strict-whitespace argument disables this behavior. + The :option:`--strict-whitespace` argument disables this behavior. End-of-line + sequences are canonicalized to UNIX-style '\n' in all modes. - - -**-version** +.. option:: -version Show the version number of this program. - - - EXIT STATUS ----------- - -If **FileCheck** verifies that the file matches the expected contents, it exits -with 0. Otherwise, if not, or if an error occurs, it will exit with a non-zero -value. - +If :program:`FileCheck` verifies that the file matches the expected contents, +it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a +non-zero value. TUTORIAL -------- - FileCheck is typically used from LLVM regression tests, being invoked on the RUN line of the test. A simple example of using FileCheck from a RUN line looks like this: - .. code-block:: llvm ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s - -This syntax says to pipe the current file ("%s") into llvm-as, pipe that into -llc, then pipe the output of llc into FileCheck. This means that FileCheck will -be verifying its standard input (the llc output) against the filename argument -specified (the original .ll file specified by "%s"). To see how this works, -let's look at the rest of the .ll file (after the RUN line): - +This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe +that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``. This +means that FileCheck will be verifying its standard input (the llc output) +against the filename argument specified (the original ``.ll`` file specified by +"``%s``"). To see how this works, let's look at the rest of the ``.ll`` file +(after the RUN line): .. code-block:: llvm @@ -113,32 +93,30 @@ let's look at the rest of the .ll file (after the RUN line): ret void } +Here you can see some "``CHECK:``" lines specified in comments. Now you can +see how the file is piped into ``llvm-as``, then ``llc``, and the machine code +output is what we are verifying. FileCheck checks the machine code output to +verify that it matches what the "``CHECK:``" lines specify. -Here you can see some "CHECK:" lines specified in comments. Now you can see -how the file is piped into llvm-as, then llc, and the machine code output is -what we are verifying. FileCheck checks the machine code output to verify that -it matches what the "CHECK:" lines specify. - -The syntax of the CHECK: lines is very simple: they are fixed strings that +The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that must occur in order. FileCheck defaults to ignoring horizontal whitespace differences (e.g. a space is allowed to match a tab) but otherwise, the contents -of the CHECK: line is required to match some thing in the test file exactly. +of the "``CHECK:``" line is required to match some thing in the test file exactly. One nice thing about FileCheck (compared to grep) is that it allows merging test cases together into logical groups. For example, because the test above -is checking for the "sub1:" and "inc4:" labels, it will not match unless there -is a "subl" in between those labels. If it existed somewhere else in the file, -that would not count: "grep subl" matches if subl exists anywhere in the -file. +is checking for the "``sub1:``" and "``inc4:``" labels, it will not match +unless there is a "``subl``" in between those labels. If it existed somewhere +else in the file, that would not count: "``grep subl``" matches if "``subl``" +exists anywhere in the file. The FileCheck -check-prefix option ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The FileCheck -check-prefix option allows multiple test configurations to be -driven from one .ll file. This is useful in many circumstances, for example, -testing different architectural variants with llc. Here's a simple example: - +The FileCheck :option:`-check-prefix` option allows multiple test +configurations to be driven from one `.ll` file. This is useful in many +circumstances, for example, testing different architectural variants with +:program:`llc`. Here's a simple example: .. code-block:: llvm @@ -157,21 +135,17 @@ testing different architectural variants with llc. Here's a simple example: ; X64: pinsrd $1, %edi, %xmm0 } - In this case, we're testing that we get the expected code generation with both 32-bit and 64-bit code generation. - The "CHECK-NEXT:" directive ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Sometimes you want to match lines and would like to verify that matches happen on exactly consecutive lines with no other lines in between them. In -this case, you can use CHECK: and CHECK-NEXT: directives to specify this. If -you specified a custom check prefix, just use "-NEXT:". For -example, something like this works as you'd expect: - +this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify +this. If you specified a custom check prefix, just use "``-NEXT:``". +For example, something like this works as you'd expect: .. code-block:: llvm @@ -193,22 +167,18 @@ example, something like this works as you'd expect: ; CHECK-NEXT: ret } - -CHECK-NEXT: directives reject the input unless there is exactly one newline -between it an the previous directive. A CHECK-NEXT cannot be the first -directive in a file. - +"``CHECK-NEXT:``" directives reject the input unless there is exactly one +newline between it and the previous directive. A "``CHECK-NEXT:``" cannot be +the first directive in a file. The "CHECK-NOT:" directive ~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The CHECK-NOT: directive is used to verify that a string doesn't occur +The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur between two matches (or before the first match, or after the last match). For example, to verify that a load is removed by a transformation, a test like this can be used: - .. code-block:: llvm define i8 @coerce_offset0(i32 %V, i32* %P) { @@ -224,27 +194,22 @@ can be used: ; CHECK: ret i8 } - - FileCheck Pattern Matching Syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The CHECK: and CHECK-NOT: directives both take a pattern to match. For most -uses of FileCheck, fixed string matching is perfectly sufficient. For some -things, a more flexible form of matching is desired. To support this, FileCheck -allows you to specify regular expressions in matching strings, surrounded by -double braces: **{{yourregex}}**. Because we want to use fixed string -matching for a majority of what we do, FileCheck has been designed to support -mixing and matching fixed string matching with regular expressions. This allows -you to write things like this: - +The "``CHECK:``" and "``CHECK-NOT:``" directives both take a pattern to match. +For most uses of FileCheck, fixed string matching is perfectly sufficient. For +some things, a more flexible form of matching is desired. To support this, +FileCheck allows you to specify regular expressions in matching strings, +surrounded by double braces: ``{{yourregex}}``. Because we want to use fixed +string matching for a majority of what we do, FileCheck has been designed to +support mixing and matching fixed string matching with regular expressions. +This allows you to write things like this: .. code-block:: llvm ; CHECK: movhpd {{[0-9]+}}(%esp), {{%xmm[0-7]}} - In this case, any offset from the ESP register will be allowed, and any xmm register will be allowed. @@ -252,19 +217,16 @@ Because regular expressions are enclosed with double braces, they are visually distinct, and you don't need to use escape characters within the double braces like you would in C. In the rare case that you want to match double braces explicitly from the input, you can use something ugly like -**{{[{][{]}}** as your pattern. - +``{{[{][{]}}`` as your pattern. FileCheck Variables ~~~~~~~~~~~~~~~~~~~ - It is often useful to match a pattern and then verify that it occurs again later in the file. For codegen tests, this can be useful to allow any register, -but verify that that register is used consistently later. To do this, FileCheck -allows named variables to be defined and substituted into patterns. Here is a -simple example: - +but verify that that register is used consistently later. To do this, +:program:`FileCheck` allows named variables to be defined and substituted into +patterns. Here is a simple example: .. code-block:: llvm @@ -272,19 +234,46 @@ simple example: ; CHECK: notw [[REGISTER:%[a-z]+]] ; CHECK: andw {{.*}}[[REGISTER]] +The first check line matches a regex ``%[a-z]+`` and captures it into the +variable ``REGISTER``. The second line verifies that whatever is in +``REGISTER`` occurs later in the file after an "``andw``". :program:`FileCheck` +variable references are always contained in ``[[ ]]`` pairs, and their names can +be formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``. If a colon follows the name, +then it is a definition of the variable; otherwise, it is a use. -The first check line matches a regex (**%[a-z]+**) and captures it into -the variable "REGISTER". The second line verifies that whatever is in REGISTER -occurs later in the file after an "andw". FileCheck variable references are -always contained in **[[ ]]** pairs, and their names can be formed with the -regex **[a-zA-Z][a-zA-Z0-9]***. If a colon follows the name, then it is a -definition of the variable; otherwise, it is a use. +:program:`FileCheck` variables can be defined multiple times, and uses always +get the latest value. Variables can also be used later on the same line they +were defined on. For example: + +.. code-block:: llvm + + ; CHECK: op [[REG:r[0-9]+]], [[REG]] + +Can be useful if you want the operands of ``op`` to be the same register, +and don't care exactly which register it is. + +FileCheck Expressions +~~~~~~~~~~~~~~~~~~~~~ + +Sometimes there's a need to verify output which refers line numbers of the +match file, e.g. when testing compiler diagnostics. This introduces a certain +fragility of the match file structure, as "``CHECK:``" lines contain absolute +line numbers in the same file, which have to be updated whenever line numbers +change due to text addition or deletion. + +To support this case, FileCheck allows using ``[[@LINE]]``, +``[[@LINE+]]``, ``[[@LINE-]]`` expressions in patterns. These +expressions expand to a number of the line where a pattern is located (with an +optional integer offset). + +This way match patterns can be put near the relevant test lines and include +relative line number references, for example: + +.. code-block:: c++ + + // CHECK: test.cpp:[[@LINE+4]]:6: error: expected ';' after top level declarator + // CHECK-NEXT: {{^int a}} + // CHECK-NEXT: {{^ \^}} + // CHECK-NEXT: {{^ ;}} + int a -FileCheck variables can be defined multiple times, and uses always get the -latest value. Note that variables are all read at the start of a "CHECK" line -and are all defined at the end. This means that if you have something like -"**CHECK: [[XYZ:.\\*]]x[[XYZ]]**", the check line will read the previous -value of the XYZ variable and define a new one after the match is performed. If -you need to do something like this you can probably take advantage of the fact -that FileCheck is not actually line-oriented when it matches, this allows you to -define two separate CHECK lines that match on the same line. diff --git a/docs/CommandGuide/bugpoint.rst b/docs/CommandGuide/bugpoint.rst index c1b3b6eca627..e4663e5d4477 100644 --- a/docs/CommandGuide/bugpoint.rst +++ b/docs/CommandGuide/bugpoint.rst @@ -1,19 +1,15 @@ bugpoint - automatic test case reduction tool ============================================= - SYNOPSIS -------- - **bugpoint** [*options*] [*input LLVM ll/bc files*] [*LLVM passes*] **--args** *program arguments* - DESCRIPTION ----------- - **bugpoint** narrows down the source of problems in LLVM tools and passes. It can be used to debug three types of failures: optimizer crashes, miscompilations by optimizers, or bad native code generation (including problems in the static @@ -22,82 +18,61 @@ For more information on the design and inner workings of **bugpoint**, as well a advice for using bugpoint, see *llvm/docs/Bugpoint.html* in the LLVM distribution. - OPTIONS ------- - - **--additional-so** *library* Load the dynamic shared object *library* into the test program whenever it is run. This is useful if you are debugging programs which depend on non-LLVM libraries (such as the X or curses libraries) to run. - - **--append-exit-code**\ =\ *{true,false}* Append the test programs exit code to the output file so that a change in exit code is considered a test failure. Defaults to false. - - **--args** *program args* - Pass all arguments specified after -args to the test program whenever it runs. - Note that if any of the *program args* start with a '-', you should use: + Pass all arguments specified after **--args** to the test program whenever it runs. + Note that if any of the *program args* start with a "``-``", you should use: - - .. code-block:: perl + .. code-block:: bash bugpoint [bugpoint args] --args -- [program args] - - The "--" right after the **--args** option tells **bugpoint** to consider any - options starting with ``-`` to be part of the **--args** option, not as options to - **bugpoint** itself. - - + The "``--``" right after the **--args** option tells **bugpoint** to consider + any options starting with "``-``" to be part of the **--args** option, not as + options to **bugpoint** itself. **--tool-args** *tool args* - Pass all arguments specified after --tool-args to the LLVM tool under test + Pass all arguments specified after **--tool-args** to the LLVM tool under test (**llc**, **lli**, etc.) whenever it runs. You should use this option in the following way: - - .. code-block:: perl + .. code-block:: bash bugpoint [bugpoint args] --tool-args -- [tool args] - - The "--" right after the **--tool-args** option tells **bugpoint** to consider any - options starting with ``-`` to be part of the **--tool-args** option, not as - options to **bugpoint** itself. (See **--args**, above.) - - + The "``--``" right after the **--tool-args** option tells **bugpoint** to + consider any options starting with "``-``" to be part of the **--tool-args** + option, not as options to **bugpoint** itself. (See **--args**, above.) **--safe-tool-args** *tool args* Pass all arguments specified after **--safe-tool-args** to the "safe" execution tool. - - **--gcc-tool-args** *gcc tool args* Pass all arguments specified after **--gcc-tool-args** to the invocation of **gcc**. - - **--opt-args** *opt args* Pass all arguments specified after **--opt-args** to the invocation of **opt**. - - **--disable-{dce,simplifycfg}** Do not run the specified passes to clean up and reduce the size of the test @@ -105,36 +80,26 @@ OPTIONS reduce test programs. If you're trying to find a bug in one of these passes, **bugpoint** may crash. - - **--enable-valgrind** Use valgrind to find faults in the optimization phase. This will allow bugpoint to find otherwise asymptomatic problems caused by memory mis-management. - - **-find-bugs** Continually randomize the specified passes and run them on the test program until a bug is found or the user kills **bugpoint**. - - **-help** Print a summary of command line options. - - **--input** *filename* Open *filename* and redirect the standard input of the test program, whenever it runs, to come from that file. - - **--load** *plugin* Load the dynamic object *plugin* into **bugpoint** itself. This object should @@ -143,20 +108,15 @@ OPTIONS optimizations, use the **-help** and **--load** options together; for example: - .. code-block:: perl + .. code-block:: bash bugpoint --load myNewPass.so -help - - - **--mlimit** *megabytes* Specifies an upper limit on memory usage of the optimization and codegen. Set to zero to disable the limit. - - **--output** *filename* Whenever the test program produces output on its standard output stream, it @@ -164,14 +124,10 @@ OPTIONS do not use this option, **bugpoint** will attempt to generate a reference output by compiling the program with the "safe" backend and running it. - - **--profile-info-file** *filename* Profile file loaded by **--profile-loader**. - - **--run-{int,jit,llc,custom}** Whenever the test program is compiled, **bugpoint** should generate code for it @@ -179,8 +135,6 @@ OPTIONS interpreter, the JIT compiler, the static native code compiler, or a custom command (see **--exec-command**) respectively. - - **--safe-{llc,custom}** When debugging a code generator, **bugpoint** should use the specified code @@ -192,16 +146,12 @@ OPTIONS respectively. The interpreter and the JIT backends cannot currently be used as the "safe" backends. - - **--exec-command** *command* This option defines the command to use with the **--run-custom** and **--safe-custom** options to execute the bitcode testcase. This can be useful for cross-compilation. - - **--compile-command** *command* This option defines the command to use with the **--compile-custom** @@ -210,38 +160,28 @@ OPTIONS generate a reduced unit test, you may add CHECK directives to the testcase and pass the name of an executable compile-command script in this form: - .. code-block:: sh #!/bin/sh llc "$@" not FileCheck [bugpoint input file].ll < bugpoint-test-program.s - This script will "fail" as long as FileCheck passes. So the result will be the minimum bitcode that passes FileCheck. - - **--safe-path** *path* This option defines the path to the command to execute with the **--safe-{int,jit,llc,custom}** option. - - - EXIT STATUS ----------- - If **bugpoint** succeeds in finding a problem, it will exit with 0. Otherwise, if an error occurs, it will exit with a non-zero value. - SEE ALSO -------- - opt|opt diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst index 73a4835dd7a1..ac8a944a2e76 100644 --- a/docs/CommandGuide/index.rst +++ b/docs/CommandGuide/index.rst @@ -1,5 +1,3 @@ -.. _commands: - LLVM Command Guide ------------------ @@ -30,6 +28,7 @@ Basic Commands llvm-diff llvm-cov llvm-stress + llvm-symbolizer Debugging Tools ~~~~~~~~~~~~~~~ diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst index 9e96cd2a4bfd..40c7646260a4 100644 --- a/docs/CommandGuide/lit.rst +++ b/docs/CommandGuide/lit.rst @@ -1,351 +1,278 @@ lit - LLVM Integrated Tester ============================ - SYNOPSIS -------- - -**lit** [*options*] [*tests*] - +:program:`lit` [*options*] [*tests*] DESCRIPTION ----------- +:program:`lit` is a portable tool for executing LLVM and Clang style test +suites, summarizing their results, and providing indication of failures. +:program:`lit` is designed to be a lightweight testing tool with as simple a +user interface as possible. -**lit** is a portable tool for executing LLVM and Clang style test suites, -summarizing their results, and providing indication of failures. **lit** is -designed to be a lightweight testing tool with as simple a user interface as -possible. - -**lit** should be run with one or more *tests* to run specified on the command -line. Tests can be either individual test files or directories to search for -tests (see "TEST DISCOVERY"). +:program:`lit` should be run with one or more *tests* to run specified on the +command line. Tests can be either individual test files or directories to +search for tests (see :ref:`test-discovery`). Each specified test will be executed (potentially in parallel) and once all -tests have been run **lit** will print summary information on the number of tests -which passed or failed (see "TEST STATUS RESULTS"). The **lit** program will -execute with a non-zero exit code if any tests fail. +tests have been run :program:`lit` will print summary information on the number +of tests which passed or failed (see :ref:`test-status-results`). The +:program:`lit` program will execute with a non-zero exit code if any tests +fail. -By default **lit** will use a succinct progress display and will only print -summary information for test failures. See "OUTPUT OPTIONS" for options -controlling the **lit** progress display and output. +By default :program:`lit` will use a succinct progress display and will only +print summary information for test failures. See :ref:`output-options` for +options controlling the :program:`lit` progress display and output. -**lit** also includes a number of options for controlling how tests are executed -(specific features may depend on the particular test format). See "EXECUTION -OPTIONS" for more information. +:program:`lit` also includes a number of options for controlling how tests are +executed (specific features may depend on the particular test format). See +:ref:`execution-options` for more information. -Finally, **lit** also supports additional options for only running a subset of -the options specified on the command line, see "SELECTION OPTIONS" for -more information. - -Users interested in the **lit** architecture or designing a **lit** testing -implementation should see "LIT INFRASTRUCTURE" +Finally, :program:`lit` also supports additional options for only running a +subset of the options specified on the command line, see +:ref:`selection-options` for more information. +Users interested in the :program:`lit` architecture or designing a +:program:`lit` testing implementation should see :ref:`lit-infrastructure`. GENERAL OPTIONS --------------- +.. option:: -h, --help + Show the :program:`lit` help message. -**-h**, **--help** +.. option:: -j N, --threads=N - Show the **lit** help message. + Run ``N`` tests in parallel. By default, this is automatically chosen to + match the number of detected available CPUs. +.. option:: --config-prefix=NAME + Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for + test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`. -**-j** *N*, **--threads**\ =\ *N* +.. option:: --param NAME, --param NAME=VALUE - Run *N* tests in parallel. By default, this is automatically chosen to match - the number of detected available CPUs. - - - -**--config-prefix**\ =\ *NAME* - - Search for *NAME.cfg* and *NAME.site.cfg* when searching for test suites, - instead of *lit.cfg* and *lit.site.cfg*. - - - -**--param** *NAME*, **--param** *NAME*\ =\ *VALUE* - - Add a user defined parameter *NAME* with the given *VALUE* (or the empty - string if not given). The meaning and use of these parameters is test suite + Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty + string if not given). The meaning and use of these parameters is test suite dependent. - - +.. _output-options: OUTPUT OPTIONS -------------- - - -**-q**, **--quiet** +.. option:: -q, --quiet Suppress any output except for test failures. - - -**-s**, **--succinct** +.. option:: -s, --succinct Show less output, for example don't show information on tests that pass. - - -**-v**, **--verbose** +.. option:: -v, --verbose Show more information on test failures, for example the entire test output instead of just the test result. - - -**--no-progress-bar** +.. option:: --no-progress-bar Do not use curses based progress bar. - - +.. _execution-options: EXECUTION OPTIONS ----------------- +.. option:: --path=PATH + Specify an additional ``PATH`` to use when searching for executables in tests. -**--path**\ =\ *PATH* +.. option:: --vg - Specify an addition *PATH* to use when searching for executables in tests. + Run individual tests under valgrind (using the memcheck tool). The + ``--error-exitcode`` argument for valgrind is used so that valgrind failures + will cause the program to exit with a non-zero status. + When this option is enabled, :program:`lit` will also automatically provide a + "``valgrind``" feature that can be used to conditionally disable (or expect + failure in) certain tests. +.. option:: --vg-arg=ARG -**--vg** + When :option:`--vg` is used, specify an additional argument to pass to + :program:`valgrind` itself. - Run individual tests under valgrind (using the memcheck tool). The - *--error-exitcode* argument for valgrind is used so that valgrind failures will - cause the program to exit with a non-zero status. +.. option:: --vg-leak - When this option is enabled, **lit** will also automatically provide a - "valgrind" feature that can be used to conditionally disable (or expect failure - in) certain tests. - - - -**--vg-arg**\ =\ *ARG* - - When *--vg* is used, specify an additional argument to pass to valgrind itself. - - - -**--vg-leak** - - When *--vg* is used, enable memory leak checks. When this option is enabled, - **lit** will also automatically provide a "vg_leak" feature that can be - used to conditionally disable (or expect failure in) certain tests. - - - - -**--time-tests** - - Track the wall time individual tests take to execute and includes the results in - the summary output. This is useful for determining which tests in a test suite - take the most time to execute. Note that this option is most useful with *-j - 1*. + When :option:`--vg` is used, enable memory leak checks. When this option is + enabled, :program:`lit` will also automatically provide a "``vg_leak``" + feature that can be used to conditionally disable (or expect failure in) + certain tests. +.. option:: --time-tests + Track the wall time individual tests take to execute and includes the results + in the summary output. This is useful for determining which tests in a test + suite take the most time to execute. Note that this option is most useful + with ``-j 1``. +.. _selection-options: SELECTION OPTIONS ----------------- +.. option:: --max-tests=N + Run at most ``N`` tests and then terminate. -**--max-tests**\ =\ *N* +.. option:: --max-time=N - Run at most *N* tests and then terminate. + Spend at most ``N`` seconds (approximately) running tests and then terminate. - - -**--max-time**\ =\ *N* - - Spend at most *N* seconds (approximately) running tests and then terminate. - - - -**--shuffle** +.. option:: --shuffle Run the tests in a random order. - - - ADDITIONAL OPTIONS ------------------ +.. option:: --debug + Run :program:`lit` in debug mode, for debugging configuration issues and + :program:`lit` itself. -**--debug** - - Run **lit** in debug mode, for debugging configuration issues and **lit** itself. - - - -**--show-suites** +.. option:: --show-suites List the discovered test suites as part of the standard output. +.. option:: --repeat=N - -**--no-tcl-as-sh** - - Run Tcl scripts internally (instead of converting to shell scripts). - - - -**--repeat**\ =\ *N* - - Run each test *N* times. Currently this is primarily useful for timing tests, - other results are not collated in any reasonable fashion. - - - + Run each test ``N`` times. Currently this is primarily useful for timing + tests, other results are not collated in any reasonable fashion. EXIT STATUS ----------- - -**lit** will exit with an exit code of 1 if there are any FAIL or XPASS -results. Otherwise, it will exit with the status 0. Other exit codes are used +:program:`lit` will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes are used for non-test related failures (for example a user error or an internal program error). +.. _test-discovery: TEST DISCOVERY -------------- +The inputs passed to :program:`lit` can be either individual tests, or entire +directories or hierarchies of tests to run. When :program:`lit` starts up, the +first thing it does is convert the inputs into a complete list of tests to run +as part of *test discovery*. -The inputs passed to **lit** can be either individual tests, or entire -directories or hierarchies of tests to run. When **lit** starts up, the first -thing it does is convert the inputs into a complete list of tests to run as part -of *test discovery*. +In the :program:`lit` model, every test must exist inside some *test suite*. +:program:`lit` resolves the inputs specified on the command line to test suites +by searching upwards from the input path until it finds a :file:`lit.cfg` or +:file:`lit.site.cfg` file. These files serve as both a marker of test suites +and as configuration files which :program:`lit` loads in order to understand +how to find and run the tests inside the test suite. -In the **lit** model, every test must exist inside some *test suite*. **lit** -resolves the inputs specified on the command line to test suites by searching -upwards from the input path until it finds a *lit.cfg* or *lit.site.cfg* -file. These files serve as both a marker of test suites and as configuration -files which **lit** loads in order to understand how to find and run the tests -inside the test suite. - -Once **lit** has mapped the inputs into test suites it traverses the list of -inputs adding tests for individual files and recursively searching for tests in -directories. +Once :program:`lit` has mapped the inputs into test suites it traverses the +list of inputs adding tests for individual files and recursively searching for +tests in directories. This behavior makes it easy to specify a subset of tests to run, while still allowing the test suite configuration to control exactly how tests are -interpreted. In addition, **lit** always identifies tests by the test suite they -are in, and their relative path inside the test suite. For appropriately -configured projects, this allows **lit** to provide convenient and flexible -support for out-of-tree builds. +interpreted. In addition, :program:`lit` always identifies tests by the test +suite they are in, and their relative path inside the test suite. For +appropriately configured projects, this allows :program:`lit` to provide +convenient and flexible support for out-of-tree builds. +.. _test-status-results: TEST STATUS RESULTS ------------------- - Each test ultimately produces one of the following six results: - **PASS** The test succeeded. - - **XFAIL** - The test failed, but that is expected. This is used for test formats which allow + The test failed, but that is expected. This is used for test formats which allow specifying that a test does not currently work, but wish to leave it in the test suite. - - **XPASS** - The test succeeded, but it was expected to fail. This is used for tests which + The test succeeded, but it was expected to fail. This is used for tests which were specified as expected to fail, but are now succeeding (generally because the feature they test was broken and has been fixed). - - **FAIL** The test failed. - - **UNRESOLVED** - The test result could not be determined. For example, this occurs when the test + The test result could not be determined. For example, this occurs when the test could not be run, the test itself is invalid, or the test was interrupted. - - **UNSUPPORTED** - The test is not supported in this environment. This is used by test formats + The test is not supported in this environment. This is used by test formats which can report unsupported tests. - - Depending on the test format tests may produce additional information about -their status (generally only for failures). See the Output|"OUTPUT OPTIONS" +their status (generally only for failures). See the :ref:`output-options` section for more information. +.. _lit-infrastructure: LIT INFRASTRUCTURE ------------------ +This section describes the :program:`lit` testing architecture for users interested in +creating a new :program:`lit` testing implementation, or extending an existing one. -This section describes the **lit** testing architecture for users interested in -creating a new **lit** testing implementation, or extending an existing one. - -**lit** proper is primarily an infrastructure for discovering and running +:program:`lit` proper is primarily an infrastructure for discovering and running arbitrary tests, and to expose a single convenient interface to these -tests. **lit** itself doesn't know how to run tests, rather this logic is +tests. :program:`lit` itself doesn't know how to run tests, rather this logic is defined by *test suites*. TEST SUITES ~~~~~~~~~~~ - -As described in "TEST DISCOVERY", tests are always located inside a *test -suite*. Test suites serve to define the format of the tests they contain, the +As described in :ref:`test-discovery`, tests are always located inside a *test +suite*. Test suites serve to define the format of the tests they contain, the logic for finding those tests, and any additional information to run the tests. -**lit** identifies test suites as directories containing *lit.cfg* or -*lit.site.cfg* files (see also **--config-prefix**). Test suites are initially -discovered by recursively searching up the directory hierarchy for all the input -files passed on the command line. You can use **--show-suites** to display the -discovered test suites at startup. +:program:`lit` identifies test suites as directories containing ``lit.cfg`` or +``lit.site.cfg`` files (see also :option:`--config-prefix`). Test suites are +initially discovered by recursively searching up the directory hierarchy for +all the input files passed on the command line. You can use +:option:`--show-suites` to display the discovered test suites at startup. -Once a test suite is discovered, its config file is loaded. Config files -themselves are Python modules which will be executed. When the config file is +Once a test suite is discovered, its config file is loaded. Config files +themselves are Python modules which will be executed. When the config file is executed, two important global variables are predefined: - **lit** The global **lit** configuration object (a *LitConfig* instance), which defines the builtin test formats, global configuration parameters, and other helper routines for implementing test configurations. - - **config** This is the config object (a *TestingConfig* instance) for the test suite, - which the config file is expected to populate. The following variables are also + which the config file is expected to populate. The following variables are also available on the *config* object, some of which must be set by the config and others are optional or predefined: @@ -353,135 +280,132 @@ executed, two important global variables are predefined: diagnostics. **test_format** *[required]* The test format object which will be used to - discover and run tests in the test suite. Generally this will be a builtin test + discover and run tests in the test suite. Generally this will be a builtin test format available from the *lit.formats* module. - **test_src_root** The filesystem path to the test suite root. For out-of-dir + **test_src_root** The filesystem path to the test suite root. For out-of-dir builds this is the directory that will be scanned for tests. **test_exec_root** For out-of-dir builds, the path to the test suite root inside - the object directory. This is where tests will be run and temporary output files + the object directory. This is where tests will be run and temporary output files placed. **environment** A dictionary representing the environment to use when executing tests in the suite. **suffixes** For **lit** test formats which scan directories for tests, this - variable is a list of suffixes to identify test files. Used by: *ShTest*, - *TclTest*. + variable is a list of suffixes to identify test files. Used by: *ShTest*. **substitutions** For **lit** test formats which substitute variables into a test - script, the list of substitutions to perform. Used by: *ShTest*, *TclTest*. + script, the list of substitutions to perform. Used by: *ShTest*. **unsupported** Mark an unsupported directory, all tests within it will be - reported as unsupported. Used by: *ShTest*, *TclTest*. + reported as unsupported. Used by: *ShTest*. **parent** The parent configuration, this is the config object for the directory containing the test suite, or None. - **root** The root configuration. This is the top-most **lit** configuration in + **root** The root configuration. This is the top-most :program:`lit` configuration in the project. **on_clone** The config is actually cloned for every subdirectory inside a test - suite, to allow local configuration on a per-directory basis. The *on_clone* + suite, to allow local configuration on a per-directory basis. The *on_clone* variable can be set to a Python function which will be called whenever a - configuration is cloned (for a subdirectory). The function should takes three + configuration is cloned (for a subdirectory). The function should takes three arguments: (1) the parent configuration, (2) the new configuration (which the *on_clone* function will generally modify), and (3) the test path to the new directory being scanned. - - - TEST DISCOVERY ~~~~~~~~~~~~~~ - -Once test suites are located, **lit** recursively traverses the source directory -(following *test_src_root*) looking for tests. When **lit** enters a -sub-directory, it first checks to see if a nested test suite is defined in that -directory. If so, it loads that test suite recursively, otherwise it -instantiates a local test config for the directory (see "LOCAL CONFIGURATION -FILES"). +Once test suites are located, :program:`lit` recursively traverses the source +directory (following *test_src_root*) looking for tests. When :program:`lit` +enters a sub-directory, it first checks to see if a nested test suite is +defined in that directory. If so, it loads that test suite recursively, +otherwise it instantiates a local test config for the directory (see +:ref:`local-configuration-files`). Tests are identified by the test suite they are contained within, and the -relative path inside that suite. Note that the relative path may not refer to an -actual file on disk; some test formats (such as *GoogleTest*) define "virtual -tests" which have a path that contains both the path to the actual test file and -a subpath to identify the virtual test. +relative path inside that suite. Note that the relative path may not refer to +an actual file on disk; some test formats (such as *GoogleTest*) define +"virtual tests" which have a path that contains both the path to the actual +test file and a subpath to identify the virtual test. +.. _local-configuration-files: LOCAL CONFIGURATION FILES ~~~~~~~~~~~~~~~~~~~~~~~~~ - -When **lit** loads a subdirectory in a test suite, it instantiates a local test -configuration by cloning the configuration for the parent direction -- the root -of this configuration chain will always be a test suite. Once the test -configuration is cloned **lit** checks for a *lit.local.cfg* file in the -subdirectory. If present, this file will be loaded and can be used to specialize -the configuration for each individual directory. This facility can be used to -define subdirectories of optional tests, or to change other configuration -parameters -- for example, to change the test format, or the suffixes which -identify test files. - +When :program:`lit` loads a subdirectory in a test suite, it instantiates a +local test configuration by cloning the configuration for the parent direction +--- the root of this configuration chain will always be a test suite. Once the +test configuration is cloned :program:`lit` checks for a *lit.local.cfg* file +in the subdirectory. If present, this file will be loaded and can be used to +specialize the configuration for each individual directory. This facility can +be used to define subdirectories of optional tests, or to change other +configuration parameters --- for example, to change the test format, or the +suffixes which identify test files. TEST RUN OUTPUT FORMAT ~~~~~~~~~~~~~~~~~~~~~~ - -The b output for a test run conforms to the following schema, in both short -and verbose modes (although in short mode no PASS lines will be shown). This -schema has been chosen to be relatively easy to reliably parse by a machine (for -example in buildbot log scraping), and for other tools to generate. +The :program:`lit` output for a test run conforms to the following schema, in +both short and verbose modes (although in short mode no PASS lines will be +shown). This schema has been chosen to be relatively easy to reliably parse by +a machine (for example in buildbot log scraping), and for other tools to +generate. Each test result is expected to appear on a line that matches: -: () +.. code-block:: none -where is a standard test result such as PASS, FAIL, XFAIL, XPASS, -UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and + : () + +where ```` is a standard test result such as PASS, FAIL, XFAIL, +XPASS, UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and REGRESSED are also allowed. -The field can consist of an arbitrary string containing no newline. +The ```` field can consist of an arbitrary string containing no +newline. -The field can be used to report progress information such as -(1/300) or can be empty, but even when empty the parentheses are required. +The ```` field can be used to report progress information such +as (1/300) or can be empty, but even when empty the parentheses are required. Each test result may include additional (multiline) log information in the -following format. +following format: - TEST '()' -... log message ... - +.. code-block:: none -where should be the name of a preceding reported test, is a string of '\*' characters *at least* four characters long (the -recommended length is 20), and is an arbitrary (unparsed) -string. + TEST '()' + ... log message ... + + +where ```` should be the name of a preceding reported test, ```` is a string of "*" characters *at least* four characters long +(the recommended length is 20), and ```` is an arbitrary +(unparsed) string. The following is an example of a test run output which consists of four tests A, -B, C, and D, and a log message for the failing test C:: +B, C, and D, and a log message for the failing test C: + +.. code-block:: none PASS: A (1 of 4) PASS: B (2 of 4) FAIL: C (3 of 4) - \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* TEST 'C' FAILED \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + ******************** TEST 'C' FAILED ******************** Test 'C' failed as a result of exit code 1. - \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + ******************** PASS: D (4 of 4) - LIT EXAMPLE TESTS ~~~~~~~~~~~~~~~~~ - -The **lit** distribution contains several example implementations of test suites -in the *ExampleTests* directory. - +The :program:`lit` distribution contains several example implementations of +test suites in the *ExampleTests* directory. SEE ALSO -------- - valgrind(1) diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst index 6f1c486c3f42..70354b0343e5 100644 --- a/docs/CommandGuide/llc.rst +++ b/docs/CommandGuide/llc.rst @@ -1,251 +1,187 @@ llc - LLVM static compiler ========================== - SYNOPSIS -------- - -**llc** [*options*] [*filename*] - +:program:`llc` [*options*] [*filename*] DESCRIPTION ----------- - -The **llc** command compiles LLVM source inputs into assembly language for a -specified architecture. The assembly language output can then be passed through -a native assembler and linker to generate a native executable. +The :program:`llc` command compiles LLVM source inputs into assembly language +for a specified architecture. The assembly language output can then be passed +through a native assembler and linker to generate a native executable. The choice of architecture for the output assembly code is automatically -determined from the input file, unless the **-march** option is used to override -the default. - +determined from the input file, unless the :option:`-march` option is used to +override the default. OPTIONS ------- +If ``filename`` is "``-``" or omitted, :program:`llc` reads from standard input. +Otherwise, it will from ``filename``. Inputs can be in either the LLVM assembly +language format (``.ll``) or the LLVM bitcode format (``.bc``). -If *filename* is - or omitted, **llc** reads from standard input. Otherwise, it -will from *filename*. Inputs can be in either the LLVM assembly language -format (.ll) or the LLVM bitcode format (.bc). +If the :option:`-o` option is omitted, then :program:`llc` will send its output +to standard output if the input is from standard input. If the :option:`-o` +option specifies "``-``", then the output will also be sent to standard output. -If the **-o** option is omitted, then **llc** will send its output to standard -output if the input is from standard input. If the **-o** option specifies -, -then the output will also be sent to standard output. +If no :option:`-o` option is specified and an input file other than "``-``" is +specified, then :program:`llc` creates the output filename by taking the input +filename, removing any existing ``.bc`` extension, and adding a ``.s`` suffix. -If no **-o** option is specified and an input file other than - is specified, -then **llc** creates the output filename by taking the input filename, -removing any existing *.bc* extension, and adding a *.s* suffix. - -Other **llc** options are as follows: +Other :program:`llc` options are described below. End-user Options ~~~~~~~~~~~~~~~~ - - -**-help** +.. option:: -help Print a summary of command line options. +.. option:: -O=uint + Generate code at different optimization levels. These correspond to the + ``-O0``, ``-O1``, ``-O2``, and ``-O3`` optimization levels used by + :program:`llvm-gcc` and :program:`clang`. -**-O**\ =\ *uint* - - Generate code at different optimization levels. These correspond to the *-O0*, - *-O1*, *-O2*, and *-O3* optimization levels used by **llvm-gcc** and - **clang**. - - - -**-mtriple**\ =\ *target triple* +.. option:: -mtriple= Override the target triple specified in the input file with the specified string. - - -**-march**\ =\ *arch* +.. option:: -march= Specify the architecture for which to generate assembly, overriding the target - encoded in the input file. See the output of **llc -help** for a list of + encoded in the input file. See the output of ``llc -help`` for a list of valid architectures. By default this is inferred from the target triple or autodetected to the current architecture. - - -**-mcpu**\ =\ *cpuname* +.. option:: -mcpu= Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture. For a list of available CPUs, use: - **llvm-as < /dev/null | llc -march=xyz -mcpu=help** + .. code-block:: none + llvm-as < /dev/null | llc -march=xyz -mcpu=help -**-mattr**\ =\ *a1,+a2,-a3,...* +.. option:: -mattr=a1,+a2,-a3,... Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU. For a list of available attributes, use: - **llvm-as < /dev/null | llc -march=xyz -mattr=help** + .. code-block:: none + llvm-as < /dev/null | llc -march=xyz -mattr=help -**--disable-fp-elim** +.. option:: --disable-fp-elim Disable frame pointer elimination optimization. - - -**--disable-excess-fp-precision** +.. option:: --disable-excess-fp-precision Disable optimizations that may produce excess precision for floating point. Note that this option can dramatically slow down code on some systems (e.g. X86). - - -**--enable-no-infs-fp-math** +.. option:: --enable-no-infs-fp-math Enable optimizations that assume no Inf values. - - -**--enable-no-nans-fp-math** +.. option:: --enable-no-nans-fp-math Enable optimizations that assume no NAN values. - - -**--enable-unsafe-fp-math** +.. option:: --enable-unsafe-fp-math Enable optimizations that make unsafe assumptions about IEEE math (e.g. that addition is associative) or may not work for all input ranges. These optimizations allow the code generator to make use of some instructions which - would otherwise not be usable (such as fsin on X86). + would otherwise not be usable (such as ``fsin`` on X86). +.. option:: --enable-correct-eh-support + Instruct the **lowerinvoke** pass to insert code for correct exception + handling support. This is expensive and is by default omitted for efficiency. -**--enable-correct-eh-support** - - Instruct the **lowerinvoke** pass to insert code for correct exception handling - support. This is expensive and is by default omitted for efficiency. - - - -**--stats** +.. option:: --stats Print statistics recorded by code-generation passes. - - -**--time-passes** +.. option:: --time-passes Record the amount of time needed for each pass and print a report to standard error. +.. option:: --load= - -**--load**\ =\ *dso_path* - - Dynamically load *dso_path* (a path to a dynamically shared object) that - implements an LLVM target. This will permit the target name to be used with the - **-march** option so that code can be generated for that target. - - - + Dynamically load ``dso_path`` (a path to a dynamically shared object) that + implements an LLVM target. This will permit the target name to be used with + the :option:`-march` option so that code can be generated for that target. Tuning/Configuration Options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -**--print-machineinstrs** +.. option:: --print-machineinstrs Print generated machine code between compilation phases (useful for debugging). +.. option:: --regalloc= - -**--regalloc**\ =\ *allocator* - - Specify the register allocator to use. The default *allocator* is *local*. + Specify the register allocator to use. The default ``allocator`` is *local*. Valid register allocators are: - *simple* Very simple "always spill" register allocator - - *local* Local register allocator - - *linearscan* Linear scan global register allocator - - *iterativescan* Iterative scan global register allocator - - - - -**--spiller**\ =\ *spiller* +.. option:: --spiller= Specify the spiller to use for register allocators that support it. Currently - this option is used only by the linear scan register allocator. The default - *spiller* is *local*. Valid spillers are: - + this option is used only by the linear scan register allocator. The default + ``spiller`` is *local*. Valid spillers are: *simple* Simple spiller - - *local* Local spiller - - - - - Intel IA-32-specific Options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. option:: --x86-asm-syntax=[att|intel] - -**--x86-asm-syntax=att|intel** - - Specify whether to emit assembly code in AT&T syntax (the default) or intel + Specify whether to emit assembly code in AT&T syntax (the default) or Intel syntax. - - - - EXIT STATUS ----------- - -If **llc** succeeds, it will exit with 0. Otherwise, if an error occurs, -it will exit with a non-zero value. - +If :program:`llc` succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. SEE ALSO -------- +lli -lli|lli diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst index 7cc128444dac..a9aaf310e1f3 100644 --- a/docs/CommandGuide/lli.rst +++ b/docs/CommandGuide/lli.rst @@ -50,7 +50,7 @@ GENERAL OPTIONS -**-load**\ =\ *puginfilename* +**-load**\ =\ *pluginfilename* Causes **lli** to load the plugin (shared object) named *pluginfilename* and use it for optimization. diff --git a/docs/CommandGuide/llvm-bcanalyzer.rst b/docs/CommandGuide/llvm-bcanalyzer.rst index f1e4eac1be50..7254088ec946 100644 --- a/docs/CommandGuide/llvm-bcanalyzer.rst +++ b/docs/CommandGuide/llvm-bcanalyzer.rst @@ -1,424 +1,305 @@ llvm-bcanalyzer - LLVM bitcode analyzer ======================================= - SYNOPSIS -------- - -**llvm-bcanalyzer** [*options*] [*filename*] - +:program:`llvm-bcanalyzer` [*options*] [*filename*] DESCRIPTION ----------- +The :program:`llvm-bcanalyzer` command is a small utility for analyzing bitcode +files. The tool reads a bitcode file (such as generated with the +:program:`llvm-as` tool) and produces a statistical report on the contents of +the bitcode file. The tool can also dump a low level but human readable +version of the bitcode file. This tool is probably not of much interest or +utility except for those working directly with the bitcode file format. Most +LLVM users can just ignore this tool. -The **llvm-bcanalyzer** command is a small utility for analyzing bitcode files. -The tool reads a bitcode file (such as generated with the **llvm-as** tool) and -produces a statistical report on the contents of the bitcode file. The tool -can also dump a low level but human readable version of the bitcode file. -This tool is probably not of much interest or utility except for those working -directly with the bitcode file format. Most LLVM users can just ignore -this tool. - -If *filename* is omitted or is ``-``, then **llvm-bcanalyzer** reads its input -from standard input. This is useful for combining the tool into a pipeline. -Output is written to the standard output. - +If *filename* is omitted or is ``-``, then :program:`llvm-bcanalyzer` reads its +input from standard input. This is useful for combining the tool into a +pipeline. Output is written to the standard output. OPTIONS ------- +.. program:: llvm-bcanalyzer +.. option:: -nodetails -**-nodetails** + Causes :program:`llvm-bcanalyzer` to abbreviate its output by writing out only + a module level summary. The details for individual functions are not + displayed. - Causes **llvm-bcanalyzer** to abbreviate its output by writing out only a module - level summary. The details for individual functions are not displayed. +.. option:: -dump + Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human readable + format. This format is significantly different from LLVM assembly and + provides details about the encoding of the bitcode file. +.. option:: -verify -**-dump** - - Causes **llvm-bcanalyzer** to dump the bitcode in a human readable format. This - format is significantly different from LLVM assembly and provides details about - the encoding of the bitcode file. - - - -**-verify** - - Causes **llvm-bcanalyzer** to verify the module produced by reading the - bitcode. This ensures that the statistics generated are based on a consistent + Causes :program:`llvm-bcanalyzer` to verify the module produced by reading the + bitcode. This ensures that the statistics generated are based on a consistent module. - - -**-help** +.. option:: -help Print a summary of command line options. - - - EXIT STATUS ----------- - -If **llvm-bcanalyzer** succeeds, it will exit with 0. Otherwise, if an error -occurs, it will exit with a non-zero value, usually 1. - +If :program:`llvm-bcanalyzer` succeeds, it will exit with 0. Otherwise, if an +error occurs, it will exit with a non-zero value, usually 1. SUMMARY OUTPUT DEFINITIONS -------------------------- - -The following items are always printed by llvm-bcanalyzer. They comprize the +The following items are always printed by llvm-bcanalyzer. They comprize the summary output. - **Bitcode Analysis Of Module** This just provides the name of the module for which bitcode analysis is being generated. - - **Bitcode Version Number** The bitcode version (not LLVM version) of the file read by the analyzer. - - **File Size** The size, in bytes, of the entire bitcode file. - - **Module Bytes** - The size, in bytes, of the module block. Percentage is relative to File Size. - - + The size, in bytes, of the module block. Percentage is relative to File Size. **Function Bytes** - The size, in bytes, of all the function blocks. Percentage is relative to File + The size, in bytes, of all the function blocks. Percentage is relative to File Size. - - **Global Types Bytes** - The size, in bytes, of the Global Types Pool. Percentage is relative to File - Size. This is the size of the definitions of all types in the bitcode file. - - + The size, in bytes, of the Global Types Pool. Percentage is relative to File + Size. This is the size of the definitions of all types in the bitcode file. **Constant Pool Bytes** The size, in bytes, of the Constant Pool Blocks Percentage is relative to File Size. - - **Module Globals Bytes** Ths size, in bytes, of the Global Variable Definitions and their initializers. Percentage is relative to File Size. - - **Instruction List Bytes** The size, in bytes, of all the instruction lists in all the functions. - Percentage is relative to File Size. Note that this value is also included in + Percentage is relative to File Size. Note that this value is also included in the Function Bytes. - - **Compaction Table Bytes** The size, in bytes, of all the compaction tables in all the functions. - Percentage is relative to File Size. Note that this value is also included in + Percentage is relative to File Size. Note that this value is also included in the Function Bytes. - - **Symbol Table Bytes** - The size, in bytes, of all the symbol tables in all the functions. Percentage is - relative to File Size. Note that this value is also included in the Function + The size, in bytes, of all the symbol tables in all the functions. Percentage is + relative to File Size. Note that this value is also included in the Function Bytes. - - **Dependent Libraries Bytes** - The size, in bytes, of the list of dependent libraries in the module. Percentage - is relative to File Size. Note that this value is also included in the Module + The size, in bytes, of the list of dependent libraries in the module. Percentage + is relative to File Size. Note that this value is also included in the Module Global Bytes. - - **Number Of Bitcode Blocks** The total number of blocks of any kind in the bitcode file. - - **Number Of Functions** The total number of function definitions in the bitcode file. - - **Number Of Types** The total number of types defined in the Global Types Pool. - - **Number Of Constants** The total number of constants (of any type) defined in the Constant Pool. - - **Number Of Basic Blocks** The total number of basic blocks defined in all functions in the bitcode file. - - **Number Of Instructions** The total number of instructions defined in all functions in the bitcode file. - - **Number Of Long Instructions** The total number of long instructions defined in all functions in the bitcode - file. Long instructions are those taking greater than 4 bytes. Typically long + file. Long instructions are those taking greater than 4 bytes. Typically long instructions are GetElementPtr with several indices, PHI nodes, and calls to functions with large numbers of arguments. - - **Number Of Operands** The total number of operands used in all instructions in the bitcode file. - - **Number Of Compaction Tables** The total number of compaction tables in all functions in the bitcode file. - - **Number Of Symbol Tables** The total number of symbol tables in all functions in the bitcode file. - - **Number Of Dependent Libs** The total number of dependent libraries found in the bitcode file. - - **Total Instruction Size** The total size of the instructions in all functions in the bitcode file. - - **Average Instruction Size** The average number of bytes per instruction across all functions in the bitcode - file. This value is computed by dividing Total Instruction Size by Number Of + file. This value is computed by dividing Total Instruction Size by Number Of Instructions. - - **Maximum Type Slot Number** - The maximum value used for a type's slot number. Larger slot number values take + The maximum value used for a type's slot number. Larger slot number values take more bytes to encode. - - **Maximum Value Slot Number** - The maximum value used for a value's slot number. Larger slot number values take + The maximum value used for a value's slot number. Larger slot number values take more bytes to encode. - - **Bytes Per Value** - The average size of a Value definition (of any type). This is computed by + The average size of a Value definition (of any type). This is computed by dividing File Size by the total number of values of any type. - - **Bytes Per Global** The average size of a global definition (constants and global variables). - - **Bytes Per Function** - The average number of bytes per function definition. This is computed by + The average number of bytes per function definition. This is computed by dividing Function Bytes by Number Of Functions. - - **# of VBR 32-bit Integers** The total number of 32-bit integers encoded using the Variable Bit Rate encoding scheme. - - **# of VBR 64-bit Integers** The total number of 64-bit integers encoded using the Variable Bit Rate encoding scheme. - - **# of VBR Compressed Bytes** The total number of bytes consumed by the 32-bit and 64-bit integers that use the Variable Bit Rate encoding scheme. - - **# of VBR Expanded Bytes** The total number of bytes that would have been consumed by the 32-bit and 64-bit integers had they not been compressed with the Variable Bit Rage encoding scheme. - - **Bytes Saved With VBR** The total number of bytes saved by using the Variable Bit Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes. - - - DETAILED OUTPUT DEFINITIONS --------------------------- - The following definitions occur only if the -nodetails option was not given. The detailed output provides additional information on a per-function basis. - **Type** The type signature of the function. - - **Byte Size** The total number of bytes in the function's block. - - **Basic Blocks** The number of basic blocks defined by the function. - - **Instructions** The number of instructions defined by the function. - - **Long Instructions** The number of instructions using the long instruction format in the function. - - **Operands** The number of operands used by all instructions in the function. - - **Instruction Size** The number of bytes consumed by instructions in the function. - - **Average Instruction Size** - The average number of bytes consumed by the instructions in the function. This - value is computed by dividing Instruction Size by Instructions. - - + The average number of bytes consumed by the instructions in the function. + This value is computed by dividing Instruction Size by Instructions. **Bytes Per Instruction** - The average number of bytes used by the function per instruction. This value is - computed by dividing Byte Size by Instructions. Note that this is not the same - as Average Instruction Size. It computes a number relative to the total function - size not just the size of the instruction list. - - + The average number of bytes used by the function per instruction. This value + is computed by dividing Byte Size by Instructions. Note that this is not the + same as Average Instruction Size. It computes a number relative to the total + function size not just the size of the instruction list. **Number of VBR 32-bit Integers** The total number of 32-bit integers found in this function (for any use). - - **Number of VBR 64-bit Integers** The total number of 64-bit integers found in this function (for any use). - - **Number of VBR Compressed Bytes** The total number of bytes in this function consumed by the 32-bit and 64-bit integers that use the Variable Bit Rate encoding scheme. - - **Number of VBR Expanded Bytes** The total number of bytes in this function that would have been consumed by the 32-bit and 64-bit integers had they not been compressed with the Variable Bit Rate encoding scheme. - - **Bytes Saved With VBR** The total number of bytes saved in this function by using the Variable Bit - Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes. - - - + Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes. SEE ALSO -------- +:doc:`/CommandGuide/llvm-dis`, :doc:`/BitCodeFormat` -llvm-dis|llvm-dis, `http://llvm.org/docs/BitCodeFormat.html `_ diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst index 09275f6af714..524f24087f21 100644 --- a/docs/CommandGuide/llvm-cov.rst +++ b/docs/CommandGuide/llvm-cov.rst @@ -1,51 +1,39 @@ llvm-cov - emit coverage information ==================================== - SYNOPSIS -------- - -**llvm-cov** [-gcno=filename] [-gcda=filename] [dump] - +:program:`llvm-cov` [-gcno=filename] [-gcda=filename] [dump] DESCRIPTION ----------- - -The experimental **llvm-cov** tool reads in description file generated by compiler -and coverage data file generated by instrumented program. This program assumes -that the description and data file uses same format as gcov files. - +The experimental :program:`llvm-cov` tool reads in description file generated +by compiler and coverage data file generated by instrumented program. This +program assumes that the description and data file uses same format as gcov +files. OPTIONS ------- +.. option:: -gcno=filename + This option selects input description file generated by compiler while + instrumenting program. -**-gcno=filename]** - - This option selects input description file generated by compiler while instrumenting - program. - - - -**-gcda=filename]** +.. option:: -gcda=filename This option selects coverage data file generated by instrumented compiler. +.. option:: -dump - -**-dump** - - This options enables output dump that is suitable for a developer to help debug - **llvm-cov** itself. - - - + This options enables output dump that is suitable for a developer to help + debug :program:`llvm-cov` itself. EXIT STATUS ----------- +:program:`llvm-cov` returns 1 if it cannot read input files. Otherwise, it +exits with zero. -**llvm-cov** returns 1 if it cannot read input files. Otherwise, it exits with zero. diff --git a/docs/CommandGuide/llvm-link.rst b/docs/CommandGuide/llvm-link.rst index 63019d7cca78..3bcfa68c2599 100644 --- a/docs/CommandGuide/llvm-link.rst +++ b/docs/CommandGuide/llvm-link.rst @@ -1,96 +1,56 @@ -llvm-link - LLVM linker -======================= - +llvm-link - LLVM bitcode linker +=============================== SYNOPSIS -------- - -**llvm-link** [*options*] *filename ...* - +:program:`llvm-link` [*options*] *filename ...* DESCRIPTION ----------- - -**llvm-link** takes several LLVM bitcode files and links them together into a -single LLVM bitcode file. It writes the output file to standard output, unless -the **-o** option is used to specify a filename. - -**llvm-link** attempts to load the input files from the current directory. If -that fails, it looks for each file in each of the directories specified by the -**-L** options on the command line. The library search paths are global; each -one is searched for every input file if necessary. The directories are searched -in the order they were specified on the command line. - +:program:`llvm-link` takes several LLVM bitcode files and links them together +into a single LLVM bitcode file. It writes the output file to standard output, +unless the :option:`-o` option is used to specify a filename. OPTIONS ------- +.. option:: -f + Enable binary output on terminals. Normally, :program:`llvm-link` will refuse + to write raw bitcode output if the output stream is a terminal. With this + option, :program:`llvm-link` will write raw bitcode regardless of the output + device. -**-L** *directory* +.. option:: -o filename - Add the specified *directory* to the library search path. When looking for - libraries, **llvm-link** will look in path name for libraries. This option can be - specified multiple times; **llvm-link** will search inside these directories in - the order in which they were specified on the command line. + Specify the output file name. If ``filename`` is "``-``", then + :program:`llvm-link` will write its output to standard output. - - -**-f** - - Enable binary output on terminals. Normally, **llvm-link** will refuse to - write raw bitcode output if the output stream is a terminal. With this option, - **llvm-link** will write raw bitcode regardless of the output device. - - - -**-o** *filename* - - Specify the output file name. If *filename* is ``-``, then **llvm-link** will - write its output to standard output. - - - -**-S** +.. option:: -S Write output in LLVM intermediate language (instead of bitcode). +.. option:: -d + If specified, :program:`llvm-link` prints a human-readable version of the + output bitcode file to standard error. -**-d** - - If specified, **llvm-link** prints a human-readable version of the output - bitcode file to standard error. - - - -**-help** +.. option:: -help Print a summary of command line options. +.. option:: -v - -**-v** - - Verbose mode. Print information about what **llvm-link** is doing. This - typically includes a message for each bitcode file linked in and for each + Verbose mode. Print information about what :program:`llvm-link` is doing. + This typically includes a message for each bitcode file linked in and for each library found. - - - EXIT STATUS ----------- - -If **llvm-link** succeeds, it will exit with 0. Otherwise, if an error +If :program:`llvm-link` succeeds, it will exit with 0. Otherwise, if an error occurs, it will exit with a non-zero value. -SEE ALSO --------- - - -gccld|gccld diff --git a/docs/CommandGuide/llvm-stress.rst b/docs/CommandGuide/llvm-stress.rst index 44aa32c7557f..fb006f562b12 100644 --- a/docs/CommandGuide/llvm-stress.rst +++ b/docs/CommandGuide/llvm-stress.rst @@ -1,48 +1,34 @@ llvm-stress - generate random .ll files ======================================= - SYNOPSIS -------- - -**llvm-stress** [-size=filesize] [-seed=initialseed] [-o=outfile] - +:program:`llvm-stress` [-size=filesize] [-seed=initialseed] [-o=outfile] DESCRIPTION ----------- - -The **llvm-stress** tool is used to generate random .ll files that can be used to -test different components of LLVM. - +The :program:`llvm-stress` tool is used to generate random ``.ll`` files that +can be used to test different components of LLVM. OPTIONS ------- - - -**-o** *filename* +.. option:: -o filename Specify the output filename. +.. option:: -size size + Specify the size of the generated ``.ll`` file. -**-size** *size* - - Specify the size of the generated .ll file. - - - -**-seed** *seed* +.. option:: -seed seed Specify the seed to be used for the randomly generated instructions. - - - EXIT STATUS ----------- +:program:`llvm-stress` returns 0. -**llvm-stress** returns 0. diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst new file mode 100644 index 000000000000..73babb1e5c55 --- /dev/null +++ b/docs/CommandGuide/llvm-symbolizer.rst @@ -0,0 +1,65 @@ +llvm-symbolizer - convert addresses into source code locations +============================================================== + +SYNOPSIS +-------- + +:program:`llvm-symbolizer` [options] + +DESCRIPTION +----------- + +:program:`llvm-symbolizer` reads object file names and addresses from standard +input and prints corresponding source code locations to standard output. This +program uses debug info sections and symbol table in the object files. + +EXAMPLE +-------- + +.. code-block:: console + + $ cat addr.txt + a.out 0x4004f4 + /tmp/b.out 0x400528 + /tmp/c.so 0x710 + $ llvm-symbolizer < addr.txt + main + /tmp/a.cc:4 + + f(int, int) + /tmp/b.cc:11 + + h_inlined_into_g + /tmp/header.h:2 + g_inlined_into_f + /tmp/header.h:7 + f_inlined_into_main + /tmp/source.cc:3 + main + /tmp/source.cc:8 + +OPTIONS +------- + +.. option:: -functions + + Print function names as well as source file/line locations. Defaults to true. + +.. option:: -use-symbol-table + + Prefer function names stored in symbol table to function names + in debug info sections. Defaults to true. + +.. option:: -demangle + + Print demangled function names. Defaults to true. + +.. option:: -inlining + + If a source code location is in an inlined function, prints all the + inlnied frames. Defaults to true. + +EXIT STATUS +----------- + +:program:`llvm-symbolizer` returns 0. Other exit codes imply internal program error. diff --git a/docs/CommandGuide/opt.rst b/docs/CommandGuide/opt.rst index 72f19034c9ed..179c297c2209 100644 --- a/docs/CommandGuide/opt.rst +++ b/docs/CommandGuide/opt.rst @@ -1,183 +1,143 @@ opt - LLVM optimizer ==================== - SYNOPSIS -------- - -**opt** [*options*] [*filename*] - +:program:`opt` [*options*] [*filename*] DESCRIPTION ----------- +The :program:`opt` command is the modular LLVM optimizer and analyzer. It +takes LLVM source files as input, runs the specified optimizations or analyses +on it, and then outputs the optimized file or the analysis results. The +function of :program:`opt` depends on whether the :option:`-analyze` option is +given. -The **opt** command is the modular LLVM optimizer and analyzer. It takes LLVM -source files as input, runs the specified optimizations or analyses on it, and then -outputs the optimized file or the analysis results. The function of -**opt** depends on whether the **-analyze** option is given. - -When **-analyze** is specified, **opt** performs various analyses of the input -source. It will usually print the results on standard output, but in a few -cases, it will print output to standard error or generate a file with the -analysis output, which is usually done when the output is meant for another +When :option:`-analyze` is specified, :program:`opt` performs various analyses +of the input source. It will usually print the results on standard output, but +in a few cases, it will print output to standard error or generate a file with +the analysis output, which is usually done when the output is meant for another program. -While **-analyze** is *not* given, **opt** attempts to produce an optimized -output file. The optimizations available via **opt** depend upon what -libraries were linked into it as well as any additional libraries that have -been loaded with the **-load** option. Use the **-help** option to determine -what optimizations you can use. +While :option:`-analyze` is *not* given, :program:`opt` attempts to produce an +optimized output file. The optimizations available via :program:`opt` depend +upon what libraries were linked into it as well as any additional libraries +that have been loaded with the :option:`-load` option. Use the :option:`-help` +option to determine what optimizations you can use. -If *filename* is omitted from the command line or is *-*, **opt** reads its -input from standard input. Inputs can be in either the LLVM assembly language -format (.ll) or the LLVM bitcode format (.bc). - -If an output filename is not specified with the **-o** option, **opt** -writes its output to the standard output. +If ``filename`` is omitted from the command line or is "``-``", :program:`opt` +reads its input from standard input. Inputs can be in either the LLVM assembly +language format (``.ll``) or the LLVM bitcode format (``.bc``). +If an output filename is not specified with the :option:`-o` option, +:program:`opt` writes its output to the standard output. OPTIONS ------- +.. option:: -f + Enable binary output on terminals. Normally, :program:`opt` will refuse to + write raw bitcode output if the output stream is a terminal. With this option, + :program:`opt` will write raw bitcode regardless of the output device. -**-f** - - Enable binary output on terminals. Normally, **opt** will refuse to - write raw bitcode output if the output stream is a terminal. With this option, - **opt** will write raw bitcode regardless of the output device. - - - -**-help** +.. option:: -help Print a summary of command line options. - - -**-o** *filename* +.. option:: -o Specify the output filename. - - -**-S** +.. option:: -S Write output in LLVM intermediate language (instead of bitcode). +.. option:: -{passname} + :program:`opt` provides the ability to run any of LLVM's optimization or + analysis passes in any order. The :option:`-help` option lists all the passes + available. The order in which the options occur on the command line are the + order in which they are executed (within pass constraints). -**-{passname}** - - **opt** provides the ability to run any of LLVM's optimization or analysis passes - in any order. The **-help** option lists all the passes available. The order in - which the options occur on the command line are the order in which they are - executed (within pass constraints). - - - -**-std-compile-opts** +.. option:: -std-compile-opts This is short hand for a standard list of *compile time optimization* passes. - This is typically used to optimize the output from the llvm-gcc front end. It - might be useful for other front end compilers as well. To discover the full set - of options available, use the following command: - + This is typically used to optimize the output from the llvm-gcc front end. It + might be useful for other front end compilers as well. To discover the full + set of options available, use the following command: .. code-block:: sh llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments +.. option:: -disable-inlining + This option is only meaningful when :option:`-std-compile-opts` is given. It + simply removes the inlining pass from the standard list. +.. option:: -disable-opt -**-disable-inlining** + This option is only meaningful when :option:`-std-compile-opts` is given. It + disables most, but not all, of the :option:`-std-compile-opts`. The ones that + remain are :option:`-verify`, :option:`-lower-setjmp`, and + :option:`-funcresolve`. - This option is only meaningful when **-std-compile-opts** is given. It simply - removes the inlining pass from the standard list. - - - -**-disable-opt** - - This option is only meaningful when **-std-compile-opts** is given. It disables - most, but not all, of the **-std-compile-opts**. The ones that remain are - **-verify**, **-lower-setjmp**, and **-funcresolve**. - - - -**-strip-debug** +.. option:: -strip-debug This option causes opt to strip debug information from the module before - applying other optimizations. It is essentially the same as **-strip** but it - ensures that stripping of debug information is done first. + applying other optimizations. It is essentially the same as :option:`-strip` + but it ensures that stripping of debug information is done first. +.. option:: -verify-each + This option causes opt to add a verify pass after every pass otherwise + specified on the command line (including :option:`-verify`). This is useful + for cases where it is suspected that a pass is creating an invalid module but + it is not clear which pass is doing it. The combination of + :option:`-std-compile-opts` and :option:`-verify-each` can quickly track down + this kind of problem. -**-verify-each** +.. option:: -profile-info-file - This option causes opt to add a verify pass after every pass otherwise specified - on the command line (including **-verify**). This is useful for cases where it - is suspected that a pass is creating an invalid module but it is not clear which - pass is doing it. The combination of **-std-compile-opts** and **-verify-each** - can quickly track down this kind of problem. + Specify the name of the file loaded by the ``-profile-loader`` option. - - -**-profile-info-file** *filename* - - Specify the name of the file loaded by the -profile-loader option. - - - -**-stats** +.. option:: -stats Print statistics. - - -**-time-passes** +.. option:: -time-passes Record the amount of time needed for each pass and print it to standard error. +.. option:: -debug + If this is a debug build, this option will enable debug printouts from passes + which use the ``DEBUG()`` macro. See the `LLVM Programmer's Manual + <../ProgrammersManual.html>`_, section ``#DEBUG`` for more information. -**-debug** - - If this is a debug build, this option will enable debug printouts - from passes which use the *DEBUG()* macro. See the **LLVM Programmer's - Manual**, section *#DEBUG* for more information. - - - -**-load**\ =\ *plugin* - - Load the dynamic object *plugin*. This object should register new optimization - or analysis passes. Once loaded, the object will add new command line options to - enable various optimizations or analyses. To see the new complete list of - optimizations, use the **-help** and **-load** options together. For example: +.. option:: -load= + Load the dynamic object ``plugin``. This object should register new + optimization or analysis passes. Once loaded, the object will add new command + line options to enable various optimizations or analyses. To see the new + complete list of optimizations, use the :option:`-help` and :option:`-load` + options together. For example: .. code-block:: sh opt -load=plugin.so -help - - - -**-p** +.. option:: -p Print module after each transformation. - - - EXIT STATUS ----------- - -If **opt** succeeds, it will exit with 0. Otherwise, if an error +If :program:`opt` succeeds, it will exit with 0. Otherwise, if an error occurs, it will exit with a non-zero value. + diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst index 2d191676d9f0..1858ee447d07 100644 --- a/docs/CommandGuide/tblgen.rst +++ b/docs/CommandGuide/tblgen.rst @@ -1,186 +1,129 @@ tblgen - Target Description To C++ Code Generator ================================================= - SYNOPSIS -------- - -**tblgen** [*options*] [*filename*] - +:program:`tblgen` [*options*] [*filename*] DESCRIPTION ----------- +:program:`tblgen` translates from target description (``.td``) files into C++ +code that can be included in the definition of an LLVM target library. Most +users of LLVM will not need to use this program. It is only for assisting with +writing an LLVM target backend. -**tblgen** translates from target description (.td) files into C++ code that can -be included in the definition of an LLVM target library. Most users of LLVM will -not need to use this program. It is only for assisting with writing an LLVM -target backend. - -The input and output of **tblgen** is beyond the scope of this short -introduction. Please see the *CodeGeneration* page in the LLVM documentation. - -The *filename* argument specifies the name of a Target Description (.td) file -to read as input. +The input and output of :program:`tblgen` is beyond the scope of this short +introduction. Please see :doc:`../TableGenFundamentals`. +The *filename* argument specifies the name of a Target Description (``.td``) +file to read as input. OPTIONS ------- - - -**-help** +.. option:: -help Print a summary of command line options. +.. option:: -o filename + Specify the output file name. If ``filename`` is ``-``, then + :program:`tblgen` sends its output to standard output. -**-o** *filename* +.. option:: -I directory - Specify the output file name. If *filename* is ``-``, then **tblgen** - sends its output to standard output. + Specify where to find other target description files for inclusion. The + ``directory`` value should be a full or partial path to a directory that + contains target description files. +.. option:: -asmparsernum N + Make -gen-asm-parser emit assembly writer number ``N``. -**-I** *directory* +.. option:: -asmwriternum N - Specify where to find other target description files for inclusion. The - *directory* value should be a full or partial path to a directory that contains - target description files. + Make -gen-asm-writer emit assembly writer number ``N``. - - -**-asmparsernum** *N* - - Make -gen-asm-parser emit assembly writer number *N*. - - - -**-asmwriternum** *N* - - Make -gen-asm-writer emit assembly writer number *N*. - - - -**-class** *class Name* +.. option:: -class className Print the enumeration list for this class. - - -**-print-records** +.. option:: -print-records Print all records to standard output (default). - - -**-print-enums** +.. option:: -print-enums Print enumeration values for a class - - -**-print-sets** +.. option:: -print-sets Print expanded sets for testing DAG exprs. - - -**-gen-emitter** +.. option:: -gen-emitter Generate machine code emitter. - - -**-gen-register-info** +.. option:: -gen-register-info Generate registers and register classes info. - - -**-gen-instr-info** +.. option:: -gen-instr-info Generate instruction descriptions. - - -**-gen-asm-writer** +.. option:: -gen-asm-writer Generate the assembly writer. - - -**-gen-disassembler** +.. option:: -gen-disassembler Generate disassembler. - - -**-gen-pseudo-lowering** +.. option:: -gen-pseudo-lowering Generate pseudo instruction lowering. - - -**-gen-dag-isel** +.. option:: -gen-dag-isel Generate a DAG (Directed Acycle Graph) instruction selector. - - -**-gen-asm-matcher** +.. option:: -gen-asm-matcher Generate assembly instruction matcher. - - -**-gen-dfa-packetizer** +.. option:: -gen-dfa-packetizer Generate DFA Packetizer for VLIW targets. - - -**-gen-fast-isel** +.. option:: -gen-fast-isel Generate a "fast" instruction selector. - - -**-gen-subtarget** +.. option:: -gen-subtarget Generate subtarget enumerations. - - -**-gen-intrinsic** +.. option:: -gen-intrinsic Generate intrinsic information. - - -**-gen-tgt-intrinsic** +.. option:: -gen-tgt-intrinsic Generate target intrinsic information. - - -**-gen-enhanced-disassembly-info** +.. option:: -gen-enhanced-disassembly-info Generate enhanced disassembly info. - - -**-version** +.. option:: -version Show the version number of this program. - - - EXIT STATUS ----------- - -If **tblgen** succeeds, it will exit with 0. Otherwise, if an error +If :program:`tblgen` succeeds, it will exit with 0. Otherwise, if an error occurs, it will exit with a non-zero value. diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst index 302f5a4cf591..073958b16bad 100644 --- a/docs/CommandLine.rst +++ b/docs/CommandLine.rst @@ -1,5 +1,3 @@ -.. _commandline: - ============================== CommandLine 2.0 Library Manual ============================== @@ -68,9 +66,7 @@ CommandLine library to have the following features: This document will hopefully let you jump in and start using CommandLine in your utility quickly and painlessly. Additionally it should be a simple reference -manual to figure out how stuff works. If it is failing in some area (or you -want an extension to the library), nag the author, `Chris -Lattner `_. +manual to figure out how stuff works. Quick Start Guide ================= diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index e41f5f9eecea..681777c12d0b 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -1,5 +1,3 @@ -.. _compiler_writer_info: - ======================================================== Architecture & Platform Information for Compiler Writers ======================================================== @@ -12,8 +10,6 @@ Architecture & Platform Information for Compiler Writers This document is a work-in-progress. Additions and clarifications are welcome. - Compiled by `Misha Brukman `_. - Hardware ======== @@ -24,6 +20,11 @@ ARM * `ABI `_ +AArch64 +------- + +* `ARMv8 Instruction Set Overview `_ + Itanium (ia64) -------------- @@ -40,19 +41,15 @@ PowerPC IBM - Official manuals and docs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* `PowerPC Architecture Book `_ +* `Power Instruction Set Architecture, Versions 2.03 through 2.06 (authentication required, free sign-up) `_ - * Book I: `PowerPC User Instruction Set Architecture `_ +* `PowerPC Compiler Writer's Guide `_ - * Book II: `PowerPC Virtual Environment Architecture `_ +* `Intro to PowerPC Architecture `_ - * Book III: `PowerPC Operating Environment Architecture `_ +* `PowerPC Processor Manuals (embedded) `_ -* `PowerPC Compiler Writer's Guide `_ - -* `PowerPC Processor Manuals `_ - -* `Intro to PowerPC Architecture `_ +* `Various IBM specifications and white papers `_ * `IBM AIX/5L for POWER Assembly Reference `_ @@ -81,13 +78,13 @@ AMD - Official manuals and docs Intel - Official manuals and docs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* `IA-32 manuals `_ +* `Intel 64 and IA-32 manuals `_ * `Intel Itanium documentation `_ Other x86-specific information ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* `Calling conventions for different C++ compilers and operating systems `_ +* `Calling conventions for different C++ compilers and operating systems `_ Other relevant lists -------------------- @@ -101,6 +98,8 @@ Linux ----- * `PowerPC 64-bit ELF ABI Supplement `_ +* `Procedure Call Standard for the AArch64 Architecture `_ +* `ELF for the ARM 64-bit Architecture (AArch64) `_ OS X ---- @@ -108,6 +107,12 @@ OS X * `Mach-O Runtime Architecture `_ * `Notes on Mach-O ABI `_ +NVPTX +===== + +* `CUDA Documentation `_ includes the PTX + ISA and Driver API documentation + Miscellaneous Resources ======================= diff --git a/docs/DebuggingJITedCode.rst b/docs/DebuggingJITedCode.rst index eeb2f7787dae..d6101d510034 100644 --- a/docs/DebuggingJITedCode.rst +++ b/docs/DebuggingJITedCode.rst @@ -1,11 +1,7 @@ -.. _debugging-jited-code: - ============================== Debugging JIT-ed Code With GDB ============================== -.. sectionauthor:: Reid Kleckner and Eli Bendersky - Background ========== diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst index e35e72955640..43bdc8598531 100644 --- a/docs/DeveloperPolicy.rst +++ b/docs/DeveloperPolicy.rst @@ -1,5 +1,3 @@ -.. _developer_policy: - ===================== LLVM Developer Policy ===================== @@ -26,8 +24,8 @@ This policy is also designed to accomplish the following objectives: #. Keep the top of Subversion trees as stable as possible. -#. Establish awareness of the project's `copyright, license, and patent - policies`_ with contributors to the project. +#. Establish awareness of the project's :ref:`copyright, license, and patent + policies ` with contributors to the project. This policy is aimed at frequent contributors to LLVM. People interested in contributing one-off patches can do so in an informal way by sending them to the @@ -180,8 +178,8 @@ Developers are required to create test cases for any bugs fixed and any new features added. Some tips for getting your testcase approved: * All feature and regression test cases are added to the ``llvm/test`` - directory. The appropriate sub-directory should be selected (see the `Testing - Guide `_ for details). + directory. The appropriate sub-directory should be selected (see the + :doc:`Testing Guide ` for details). * Test cases should be written in `LLVM assembly language `_ unless the feature or regression being tested requires another language @@ -401,7 +399,7 @@ Hacker!" in the commit message. Overall, please do not add contributor names to the source code. -.. _copyright, license, and patent policies: +.. _copyright-license-patents: Copyright, License, and Patents =============================== diff --git a/docs/Dummy.html b/docs/Dummy.html new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst index 190f18261da0..0a86607556ab 100644 --- a/docs/ExceptionHandling.rst +++ b/docs/ExceptionHandling.rst @@ -1,5 +1,3 @@ -.. _exception_handling: - ========================== Exception Handling in LLVM ========================== @@ -34,13 +32,13 @@ execution of an application. A more complete description of the Itanium ABI exception handling runtime support of can be found at `Itanium C++ ABI: Exception Handling -`_. A description of the +`_. A description of the exception frame format can be found at `Exception Frames -`_, +`_, with details of the DWARF 4 specification at `DWARF 4 Standard `_. A description for the C++ exception table formats can be found at `Exception Handling Tables -`_. +`_. Setjmp/Longjmp Exception Handling --------------------------------- @@ -151,10 +149,10 @@ type info index are passed in as arguments. The landing pad saves the exception structure reference and then proceeds to select the catch block that corresponds to the type info of the exception object. -The LLVM `landingpad instruction `_ is used to convey -information about the landing pad to the back end. For C++, the ``landingpad`` -instruction returns a pointer and integer pair corresponding to the pointer to -the *exception structure* and the *selector value* respectively. +The LLVM :ref:`i_landingpad` is used to convey information about the landing +pad to the back end. For C++, the ``landingpad`` instruction returns a pointer +and integer pair corresponding to the pointer to the *exception structure* and +the *selector value* respectively. The ``landingpad`` instruction takes a reference to the personality function to be used for this ``try``/``catch`` sequence. The remainder of the instruction is @@ -203,10 +201,9 @@ A cleanup is extra code which needs to be run as part of unwinding a scope. C++ destructors are a typical example, but other languages and language extensions provide a variety of different kinds of cleanups. In general, a landing pad may need to run arbitrary amounts of cleanup code before actually entering a catch -block. To indicate the presence of cleanups, a `landingpad -instruction `_ should have a *cleanup* -clause. Otherwise, the unwinder will not stop at the landing pad if there are no -catches or filters that require it to. +block. To indicate the presence of cleanups, a :ref:`i_landingpad` should have +a *cleanup* clause. Otherwise, the unwinder will not stop at the landing pad if +there are no catches or filters that require it to. .. note:: @@ -226,9 +223,9 @@ Throw Filters C++ allows the specification of which exception types may be thrown from a function. To represent this, a top level landing pad may exist to filter out -invalid types. To express this in LLVM code the `landingpad -instruction `_ will have a filter clause. The clause -consists of an array of type infos. ``landingpad`` will return a negative value +invalid types. To express this in LLVM code the :ref:`i_landingpad` will have a +filter clause. The clause consists of an array of type infos. +``landingpad`` will return a negative value if the exception does not match any of the type infos. If no match is found then a call to ``__cxa_call_unexpected`` should be made, otherwise ``_Unwind_Resume``. Each of these functions requires a reference to the @@ -269,8 +266,8 @@ handling information at various points in generated code. .. _llvm.eh.typeid.for: -llvm.eh.typeid.for ------------------- +``llvm.eh.typeid.for`` +---------------------- .. code-block:: llvm @@ -283,8 +280,8 @@ function. This value can be used to compare against the result of .. _llvm.eh.sjlj.setjmp: -llvm.eh.sjlj.setjmp -------------------- +``llvm.eh.sjlj.setjmp`` +----------------------- .. code-block:: llvm @@ -305,8 +302,8 @@ available for use in a target-specific manner. .. _llvm.eh.sjlj.longjmp: -llvm.eh.sjlj.longjmp --------------------- +``llvm.eh.sjlj.longjmp`` +------------------------ .. code-block:: llvm @@ -318,8 +315,8 @@ a buffer populated by `llvm.eh.sjlj.setjmp`_. The frame pointer and stack pointer are restored from the buffer, then control is transferred to the destination address. -llvm.eh.sjlj.lsda ------------------ +``llvm.eh.sjlj.lsda`` +--------------------- .. code-block:: llvm @@ -330,8 +327,8 @@ the address of the Language Specific Data Area (LSDA) for the current function. The SJLJ front-end code stores this address in the exception handling function context for use by the runtime. -llvm.eh.sjlj.callsite ---------------------- +``llvm.eh.sjlj.callsite`` +------------------------- .. code-block:: llvm diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst index 6df08eee985a..3d8e9ee79a46 100644 --- a/docs/ExtendingLLVM.rst +++ b/docs/ExtendingLLVM.rst @@ -1,5 +1,3 @@ -.. _extending_llvm: - ============================================================ Extending LLVM: Adding instructions, intrinsics, types, etc. ============================================================ diff --git a/docs/FAQ.rst b/docs/FAQ.rst index b0e3ca045693..e4ab2c18f748 100644 --- a/docs/FAQ.rst +++ b/docs/FAQ.rst @@ -1,5 +1,3 @@ -.. _faq: - ================================ Frequently Asked Questions (FAQ) ================================ @@ -53,6 +51,29 @@ Some porting problems may exist in the following areas: like the Bourne Shell and sed. Porting to systems without these tools (MacOS 9, Plan 9) will require more effort. +What API do I use to store a value to one of the virtual registers in LLVM IR's SSA representation? +--------------------------------------------------------------------------------------------------- + +In short: you can't. It's actually kind of a silly question once you grok +what's going on. Basically, in code like: + +.. code-block:: llvm + + %result = add i32 %foo, %bar + +, ``%result`` is just a name given to the ``Value`` of the ``add`` +instruction. In other words, ``%result`` *is* the add instruction. The +"assignment" doesn't explicitly "store" anything to any "virtual register"; +the "``=``" is more like the mathematical sense of equality. + +Longer explanation: In order to generate a textual representation of the +IR, some kind of name has to be given to each instruction so that other +instructions can textually reference it. However, the isomorphic in-memory +representation that you manipulate from C++ has no such restriction since +instructions can simply keep pointers to any other ``Value``'s that they +reference. In fact, the names of dummy numbered temporaries like ``%1`` are +not explicitly represented in the in-memory representation at all (see +``Value::getName()``). Build Problems ============== @@ -79,7 +100,7 @@ grabbing the wrong linker/assembler/etc, there are two ways to fix it: #. Run ``configure`` with an alternative ``PATH`` that is correct. In a Bourne compatible shell, the syntax would be: -.. code-block:: bash +.. code-block:: console % PATH=[the path without the bad program] ./configure ... @@ -106,7 +127,7 @@ I've modified a Makefile in my source tree, but my build tree keeps using the ol If the Makefile already exists in your object tree, you can just run the following command in the top level directory of your object tree: -.. code-block:: bash +.. code-block:: console % ./config.status ; @@ -133,13 +154,13 @@ This is most likely occurring because you built a profile or release For example, if you built LLVM with the command: -.. code-block:: bash +.. code-block:: console % gmake ENABLE_PROFILING=1 ...then you must run the tests with the following commands: -.. code-block:: bash +.. code-block:: console % cd llvm/test % gmake ENABLE_PROFILING=1 @@ -175,17 +196,17 @@ After Subversion update, rebuilding gives the error "No rule to make target". ----------------------------------------------------------------------------- If the error is of the form: -.. code-block:: bash +.. code-block:: console gmake[2]: *** No rule to make target `/path/to/somefile', - needed by `/path/to/another/file.d'. + needed by `/path/to/another/file.d'. Stop. This may occur anytime files are moved within the Subversion repository or removed entirely. In this case, the best solution is to erase all ``.d`` files, which list dependencies for source files, and rebuild: -.. code-block:: bash +.. code-block:: console % cd $LLVM_OBJ_DIR % rm -f `find . -name \*\.d` diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html deleted file mode 100644 index 37800c8080d4..000000000000 --- a/docs/GCCFEBuildInstrs.html +++ /dev/null @@ -1,279 +0,0 @@ - - - - - - Building the LLVM GCC Front-End - - - -

- Building the LLVM GCC Front-End -

- -
    -
  1. Building llvm-gcc from Source
  2. -
  3. Building the Ada front-end
  4. -
  5. Building the Fortran front-end
  6. -
  7. License Information
  8. -
- -
-

Written by the LLVM Team

-
- - -

Building llvm-gcc from Source

- - -
- -

This section describes how to acquire and build llvm-gcc 4.2, which is based -on the GCC 4.2.1 front-end. Supported languages are Ada, C, C++, Fortran, -Objective-C and Objective-C++. Note that the instructions for building these -front-ends are completely different (and much easier!) than those for building -llvm-gcc3 in the past.

- -
    -
  1. Retrieve the appropriate llvm-gcc-4.2-version.source.tar.gz - archive from the LLVM web - site.

    - -

    It is also possible to download the sources of the llvm-gcc front end - from a read-only mirror using subversion. To check out the 4.2 code - for first time use:

    - -
    -
    -svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk dst-directory
    -
    -
    - -

    After that, the code can be be updated in the destination directory - using:

    - -
    -
    svn update
    -
    - -

    The mirror is brought up to date every evening.

  2. - -
  3. Follow the directions in the top-level README.LLVM file for - up-to-date instructions on how to build llvm-gcc. See below for building - with support for Ada or Fortran. -
- -
- - -

Building the Ada front-end

- - -
-

Building with support for Ada amounts to following the directions in the -top-level README.LLVM file, adding ",ada" to EXTRALANGS, for example: -EXTRALANGS=,ada

- -

There are some complications however:

- -
    -
  1. The only platform for which the Ada front-end is known to build is - 32 bit intel x86 running linux. It is unlikely to build for other - systems without some work.

  2. -
  3. The build requires having a compiler that supports Ada, C and C++. - The Ada front-end is written in Ada so an Ada compiler is needed to - build it. Compilers known to work with the - LLVM 2.7 release - are gcc-4.2 and the - 2005, 2006 and 2007 versions of the - GNAT GPL Edition. - GNAT GPL 2008, gcc-4.3 and later will not work. - The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is - needed to build them. The rest of gcc is written in C. - Some linux distributions provide a version of gcc that supports all - three languages (the Ada part often comes as an add-on package to - the rest of gcc). Otherwise it is possible to combine two versions - of gcc, one that supports Ada and C (such as the - 2007 GNAT GPL Edition) - and another which supports C++, see below.

  4. -
  5. Because the Ada front-end is experimental, it is wise to build the - compiler with checking enabled. This causes it to run much slower, but - helps catch mistakes in the compiler (please report any problems using - LLVM bugzilla).

  6. -
  7. The Ada front-end fails to - bootstrap, due to lack of LLVM support for - setjmp/longjmp style exception handling (used - internally by the compiler), so you must specify - --disable-bootstrap.

  8. -
- -

Supposing appropriate compilers are available, llvm-gcc with Ada support can - be built on an x86-32 linux box using the following recipe:

- -
    -
  1. Download the LLVM source - and unpack it:

    - -
    -wget http://llvm.org/releases/2.7/llvm-2.7.tgz
    -tar xzf llvm-2.7.tgz
    -mv llvm-2.7 llvm
    -
    - -

    or check out the - latest version from subversion:

    - -
    svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
    - -
  2. - -
  3. Download the - llvm-gcc-4.2 source - and unpack it:

    - -
    -wget http://llvm.org/releases/2.7/llvm-gcc-4.2-2.7.source.tgz
    -tar xzf llvm-gcc-4.2-2.7.source.tgz
    -mv llvm-gcc-4.2-2.7.source llvm-gcc-4.2
    -
    - -

    or check out the - latest version from subversion:

    - -
    -svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk llvm-gcc-4.2
    -
    -
  4. - -
  5. Make a build directory llvm-objects for llvm and make it the - current directory:

    - -
    -mkdir llvm-objects
    -cd llvm-objects
    -
    -
  6. - -
  7. Configure LLVM (here it is configured to install into /usr/local):

    - -
    -../llvm/configure --prefix=/usr/local --enable-optimized --enable-assertions
    -
    - -

    If you have a multi-compiler setup and the C++ compiler is not the - default, then you can configure like this:

    - -
    -CXX=PATH_TO_C++_COMPILER ../llvm/configure --prefix=/usr/local --enable-optimized --enable-assertions
    -
    - -

    To compile without checking (not recommended), replace - --enable-assertions with --disable-assertions.

    - -
  8. - -
  9. Build LLVM:

    - -
    -make
    -
    -
  10. - -
  11. Install LLVM (optional):

    - -
    -make install
    -
    -
  12. - -
  13. Make a build directory llvm-gcc-4.2-objects for llvm-gcc and make it the - current directory:

    - -
    -cd ..
    -mkdir llvm-gcc-4.2-objects
    -cd llvm-gcc-4.2-objects
    -
    -
  14. - -
  15. Configure llvm-gcc (here it is configured to install into /usr/local). - The --enable-checking flag turns on sanity checks inside the compiler. - To turn off these checks (not recommended), replace --enable-checking - with --disable-checking. - Additional languages can be appended to the --enable-languages switch, - for example --enable-languages=ada,c,c++.

    - -
    -../llvm-gcc-4.2/configure --prefix=/usr/local --enable-languages=ada,c \
    -                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
    -			  --disable-bootstrap --disable-multilib
    -
    - -

    If you have a multi-compiler setup, then you can configure like this:

    - -
    -export CC=PATH_TO_C_AND_ADA_COMPILER
    -export CXX=PATH_TO_C++_COMPILER
    -../llvm-gcc-4.2/configure --prefix=/usr/local --enable-languages=ada,c \
    -                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
    -			  --disable-bootstrap --disable-multilib
    -
    -
  16. - -
  17. Build and install the compiler:

    - -
    -make
    -make install
    -
    -
  18. -
- -
- - -

Building the Fortran front-end

- - -
-

To build with support for Fortran, follow the directions in the top-level -README.LLVM file, adding ",fortran" to EXTRALANGS, for example:

- -
-EXTRALANGS=,fortran
-
- -
- - -

License Information

- - -
-

-The LLVM GCC frontend is licensed to you under the GNU General Public License -and the GNU Lesser General Public License. Please see the files COPYING and -COPYING.LIB for more details. -

- -

-More information is available in the FAQ. -

-
- - - -
-
- Valid CSS - Valid HTML 4.01 - - LLVM Compiler Infrastructure
- Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $ -
- - - diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html deleted file mode 100644 index e12485167a72..000000000000 --- a/docs/GarbageCollection.html +++ /dev/null @@ -1,1389 +0,0 @@ - - - - - Accurate Garbage Collection with LLVM - - - - - -

- Accurate Garbage Collection with LLVM -

- -
    -
  1. Introduction - -
  2. - -
  3. Getting started - -
  4. - -
  5. Core support - -
  6. - -
  7. Compiler plugin interface - -
  8. - -
  9. Implementing a collector runtime - -
  10. - -
  11. References
  12. - -
- -
-

Written by Chris Lattner and - Gordon Henriksen

-
- - -

- Introduction -

- - -
- -

Garbage collection is a widely used technique that frees the programmer from -having to know the lifetimes of heap objects, making software easier to produce -and maintain. Many programming languages rely on garbage collection for -automatic memory management. There are two primary forms of garbage collection: -conservative and accurate.

- -

Conservative garbage collection often does not require any special support -from either the language or the compiler: it can handle non-type-safe -programming languages (such as C/C++) and does not require any special -information from the compiler. The -Boehm collector is -an example of a state-of-the-art conservative collector.

- -

Accurate garbage collection requires the ability to identify all pointers in -the program at run-time (which requires that the source-language be type-safe in -most cases). Identifying pointers at run-time requires compiler support to -locate all places that hold live pointer variables at run-time, including the -processor stack and registers.

- -

Conservative garbage collection is attractive because it does not require any -special compiler support, but it does have problems. In particular, because the -conservative garbage collector cannot know that a particular word in the -machine is a pointer, it cannot move live objects in the heap (preventing the -use of compacting and generational GC algorithms) and it can occasionally suffer -from memory leaks due to integer values that happen to point to objects in the -program. In addition, some aggressive compiler transformations can break -conservative garbage collectors (though these seem rare in practice).

- -

Accurate garbage collectors do not suffer from any of these problems, but -they can suffer from degraded scalar optimization of the program. In particular, -because the runtime must be able to identify and update all pointers active in -the program, some optimizations are less effective. In practice, however, the -locality and performance benefits of using aggressive garbage collection -techniques dominates any low-level losses.

- -

This document describes the mechanisms and interfaces provided by LLVM to -support accurate garbage collection.

- - -

- Goals and non-goals -

- -
- -

LLVM's intermediate representation provides garbage -collection intrinsics that offer support for a broad class of -collector models. For instance, the intrinsics permit:

- -
    -
  • semi-space collectors
  • -
  • mark-sweep collectors
  • -
  • generational collectors
  • -
  • reference counting
  • -
  • incremental collectors
  • -
  • concurrent collectors
  • -
  • cooperative collectors
  • -
- -

We hope that the primitive support built into the LLVM IR is sufficient to -support a broad class of garbage collected languages including Scheme, ML, Java, -C#, Perl, Python, Lua, Ruby, other scripting languages, and more.

- -

However, LLVM does not itself provide a garbage collector—this should -be part of your language's runtime library. LLVM provides a framework for -compile time code generation plugins. The role of these -plugins is to generate code and data structures which conforms to the binary -interface specified by the runtime library. This is similar to the -relationship between LLVM and DWARF debugging info, for example. The -difference primarily lies in the lack of an established standard in the domain -of garbage collection—thus the plugins.

- -

The aspects of the binary interface with which LLVM's GC support is -concerned are:

- -
    -
  • Creation of GC-safe points within code where collection is allowed to - execute safely.
  • -
  • Computation of the stack map. For each safe point in the code, object - references within the stack frame must be identified so that the - collector may traverse and perhaps update them.
  • -
  • Write barriers when storing object references to the heap. These are - commonly used to optimize incremental scans in generational - collectors.
  • -
  • Emission of read barriers when loading object references. These are - useful for interoperating with concurrent collectors.
  • -
- -

There are additional areas that LLVM does not directly address:

- -
    -
  • Registration of global roots with the runtime.
  • -
  • Registration of stack map entries with the runtime.
  • -
  • The functions used by the program to allocate memory, trigger a - collection, etc.
  • -
  • Computation or compilation of type maps, or registration of them with - the runtime. These are used to crawl the heap for object - references.
  • -
- -

In general, LLVM's support for GC does not include features which can be -adequately addressed with other features of the IR and does not specify a -particular binary interface. On the plus side, this means that you should be -able to integrate LLVM with an existing runtime. On the other hand, it leaves -a lot of work for the developer of a novel language. However, it's easy to get -started quickly and scale up to a more sophisticated implementation as your -compiler matures.

- -
- -
- - -

- Getting started -

- - -
- -

Using a GC with LLVM implies many things, for example:

- -
    -
  • Write a runtime library or find an existing one which implements a GC - heap.
      -
    1. Implement a memory allocator.
    2. -
    3. Design a binary interface for the stack map, used to identify - references within a stack frame on the machine stack.*
    4. -
    5. Implement a stack crawler to discover functions on the call stack.*
    6. -
    7. Implement a registry for global roots.
    8. -
    9. Design a binary interface for type maps, used to identify references - within heap objects.
    10. -
    11. Implement a collection routine bringing together all of the above.
    12. -
  • -
  • Emit compatible code from your compiler.
      -
    • Initialization in the main function.
    • -
    • Use the gc "..." attribute to enable GC code generation - (or F.setGC("...")).
    • -
    • Use @llvm.gcroot to mark stack roots.
    • -
    • Use @llvm.gcread and/or @llvm.gcwrite to - manipulate GC references, if necessary.
    • -
    • Allocate memory using the GC allocation routine provided by the - runtime library.
    • -
    • Generate type maps according to your runtime's binary interface.
    • -
  • -
  • Write a compiler plugin to interface LLVM with the runtime library.*
      -
    • Lower @llvm.gcread and @llvm.gcwrite to appropriate - code sequences.*
    • -
    • Compile LLVM's stack map to the binary form expected by the - runtime.
    • -
  • -
  • Load the plugin into the compiler. Use llc -load or link the - plugin statically with your language's compiler.*
  • -
  • Link program executables with the runtime.
  • -
- -

To help with several of these tasks (those indicated with a *), LLVM -includes a highly portable, built-in ShadowStack code generator. It is compiled -into llc and works even with the interpreter and C backends.

- - -

- In your compiler -

- -
- -

To turn the shadow stack on for your functions, first call:

- -
F.setGC("shadow-stack");
- -

for each function your compiler emits. Since the shadow stack is built into -LLVM, you do not need to load a plugin.

- -

Your compiler must also use @llvm.gcroot as documented. -Don't forget to create a root for each intermediate value that is generated -when evaluating an expression. In h(f(), g()), the result of -f() could easily be collected if evaluating g() triggers a -collection.

- -

There's no need to use @llvm.gcread and @llvm.gcwrite over -plain load and store for now. You will need them when -switching to a more advanced GC.

- -
- - -

- In your runtime -

- -
- -

The shadow stack doesn't imply a memory allocation algorithm. A semispace -collector or building atop malloc are great places to start, and can -be implemented with very little code.

- -

When it comes time to collect, however, your runtime needs to traverse the -stack roots, and for this it needs to integrate with the shadow stack. Luckily, -doing so is very simple. (This code is heavily commented to help you -understand the data structure, but there are only 20 lines of meaningful -code.)

- -
-/// @brief The map for a single function's stack frame. One of these is
-///        compiled as constant data into the executable for each function.
-/// 
-/// Storage of metadata values is elided if the %metadata parameter to
-/// @llvm.gcroot is null.
-struct FrameMap {
-  int32_t NumRoots;    //< Number of roots in stack frame.
-  int32_t NumMeta;     //< Number of metadata entries. May be < NumRoots.
-  const void *Meta[0]; //< Metadata for each root.
-};
-
-/// @brief A link in the dynamic shadow stack. One of these is embedded in the
-///        stack frame of each function on the call stack.
-struct StackEntry {
-  StackEntry *Next;    //< Link to next stack entry (the caller's).
-  const FrameMap *Map; //< Pointer to constant FrameMap.
-  void *Roots[0];      //< Stack roots (in-place array).
-};
-
-/// @brief The head of the singly-linked list of StackEntries. Functions push
-///        and pop onto this in their prologue and epilogue.
-/// 
-/// Since there is only a global list, this technique is not threadsafe.
-StackEntry *llvm_gc_root_chain;
-
-/// @brief Calls Visitor(root, meta) for each GC root on the stack.
-///        root and meta are exactly the values passed to
-///        @llvm.gcroot.
-/// 
-/// Visitor could be a function to recursively mark live objects. Or it
-/// might copy them to another heap or generation.
-/// 
-/// @param Visitor A function to invoke for every GC root on the stack.
-void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
-  for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
-    unsigned i = 0;
-    
-    // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
-    for (unsigned e = R->Map->NumMeta; i != e; ++i)
-      Visitor(&R->Roots[i], R->Map->Meta[i]);
-    
-    // For roots [NumMeta, NumRoots), the metadata pointer is null.
-    for (unsigned e = R->Map->NumRoots; i != e; ++i)
-      Visitor(&R->Roots[i], NULL);
-  }
-}
- -
- - -

- About the shadow stack -

- -
- -

Unlike many GC algorithms which rely on a cooperative code generator to -compile stack maps, this algorithm carefully maintains a linked list of stack -roots [Henderson2002]. This so-called "shadow stack" -mirrors the machine stack. Maintaining this data structure is slower than using -a stack map compiled into the executable as constant data, but has a significant -portability advantage because it requires no special support from the target -code generator, and does not require tricky platform-specific code to crawl -the machine stack.

- -

The tradeoff for this simplicity and portability is:

- -
    -
  • High overhead per function call.
  • -
  • Not thread-safe.
  • -
- -

Still, it's an easy way to get started. After your compiler and runtime are -up and running, writing a plugin will allow you to take -advantage of more advanced GC features of LLVM -in order to improve performance.

- -
- -
- - -

- IR features -

- - -
- -

This section describes the garbage collection facilities provided by the -LLVM intermediate representation. The exact behavior -of these IR features is specified by the binary interface implemented by a -code generation plugin, not by this document.

- -

These facilities are limited to those strictly necessary; they are not -intended to be a complete interface to any garbage collector. A program will -need to interface with the GC library using the facilities provided by that -program.

- - -

- Specifying GC code generation: gc "..." -

- -
- -
- define ty @name(...) gc "name" { ... -
- -

The gc function attribute is used to specify the desired GC style -to the compiler. Its programmatic equivalent is the setGC method of -Function.

- -

Setting gc "name" on a function triggers a search for a -matching code generation plugin "name"; it is that plugin which defines -the exact nature of the code generated to support GC. If none is found, the -compiler will raise an error.

- -

Specifying the GC style on a per-function basis allows LLVM to link together -programs that use different garbage collection algorithms (or none at all).

- -
- - -

- Identifying GC roots on the stack: llvm.gcroot -

- -
- -
- void @llvm.gcroot(i8** %ptrloc, i8* %metadata) -
- -

The llvm.gcroot intrinsic is used to inform LLVM that a stack -variable references an object on the heap and is to be tracked for garbage -collection. The exact impact on generated code is specified by a compiler plugin. All calls to llvm.gcroot must reside - inside the first basic block.

- -

A compiler which uses mem2reg to raise imperative code using alloca -into SSA form need only add a call to @llvm.gcroot for those variables -which a pointers into the GC heap.

- -

It is also important to mark intermediate values with llvm.gcroot. -For example, consider h(f(), g()). Beware leaking the result of -f() in the case that g() triggers a collection. Note, that -stack variables must be initialized and marked with llvm.gcroot in -function's prologue.

- -

The first argument must be a value referring to an alloca instruction -or a bitcast of an alloca. The second contains a pointer to metadata that -should be associated with the pointer, and must be a constant or global -value address. If your target collector uses tags, use a null pointer for -metadata.

- -

The %metadata argument can be used to avoid requiring heap objects -to have 'isa' pointers or tag bits. [Appel89, Goldberg91, Tolmach94] If -specified, its value will be tracked along with the location of the pointer in -the stack frame.

- -

Consider the following fragment of Java code:

- -
-       {
-         Object X;   // A null-initialized reference to an object
-         ...
-       }
-
- -

This block (which may be located in the middle of a function or in a loop -nest), could be compiled to this LLVM code:

- -
-Entry:
-   ;; In the entry block for the function, allocate the
-   ;; stack space for X, which is an LLVM pointer.
-   %X = alloca %Object*
-   
-   ;; Tell LLVM that the stack space is a stack root.
-   ;; Java has type-tags on objects, so we pass null as metadata.
-   %tmp = bitcast %Object** %X to i8**
-   call void @llvm.gcroot(i8** %tmp, i8* null)
-   ...
-
-   ;; "CodeBlock" is the block corresponding to the start
-   ;;  of the scope above.
-CodeBlock:
-   ;; Java null-initializes pointers.
-   store %Object* null, %Object** %X
-
-   ...
-
-   ;; As the pointer goes out of scope, store a null value into
-   ;; it, to indicate that the value is no longer live.
-   store %Object* null, %Object** %X
-   ...
-
- -
- - -

- Reading and writing references in the heap -

- -
- -

Some collectors need to be informed when the mutator (the program that needs -garbage collection) either reads a pointer from or writes a pointer to a field -of a heap object. The code fragments inserted at these points are called -read barriers and write barriers, respectively. The amount of -code that needs to be executed is usually quite small and not on the critical -path of any computation, so the overall performance impact of the barrier is -tolerable.

- -

Barriers often require access to the object pointer rather than the -derived pointer (which is a pointer to the field within the -object). Accordingly, these intrinsics take both pointers as separate arguments -for completeness. In this snippet, %object is the object pointer, and -%derived is the derived pointer:

- -
-    ;; An array type.
-    %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
-    ...
-
-    ;; Load the object pointer from a gcroot.
-    %object = load %class.Array** %object_addr
-
-    ;; Compute the derived pointer.
-    %derived = getelementptr %object, i32 0, i32 2, i32 %n
- -

LLVM does not enforce this relationship between the object and derived -pointer (although a plugin might). However, it would be -an unusual collector that violated it.

- -

The use of these intrinsics is naturally optional if the target GC does -require the corresponding barrier. Such a GC plugin will replace the intrinsic -calls with the corresponding load or store instruction if they -are used.

- - -

- Write barrier: llvm.gcwrite -

- -
- -
-void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived) -
- -

For write barriers, LLVM provides the llvm.gcwrite intrinsic -function. It has exactly the same semantics as a non-volatile store to -the derived pointer (the third argument). The exact code generated is specified -by a compiler plugin.

- -

Many important algorithms require write barriers, including generational -and concurrent collectors. Additionally, write barriers could be used to -implement reference counting.

- -
- - -

- Read barrier: llvm.gcread -

- -
- -
-i8* @llvm.gcread(i8* %object, i8** %derived)
-
- -

For read barriers, LLVM provides the llvm.gcread intrinsic function. -It has exactly the same semantics as a non-volatile load from the -derived pointer (the second argument). The exact code generated is specified by -a compiler plugin.

- -

Read barriers are needed by fewer algorithms than write barriers, and may -have a greater performance impact since pointer reads are more frequent than -writes.

- -
- -
- -
- - -

- Implementing a collector plugin -

- - -
- -

User code specifies which GC code generation to use with the gc -function attribute or, equivalently, with the setGC method of -Function.

- -

To implement a GC plugin, it is necessary to subclass -llvm::GCStrategy, which can be accomplished in a few lines of -boilerplate code. LLVM's infrastructure provides access to several important -algorithms. For an uncontroversial collector, all that remains may be to -compile LLVM's computed stack map to assembly code (using the binary -representation expected by the runtime library). This can be accomplished in -about 100 lines of code.

- -

This is not the appropriate place to implement a garbage collected heap or a -garbage collector itself. That code should exist in the language's runtime -library. The compiler plugin is responsible for generating code which -conforms to the binary interface defined by library, most essentially the -stack map.

- -

To subclass llvm::GCStrategy and register it with the compiler:

- -
// lib/MyGC/MyGC.cpp - Example LLVM GC plugin
-
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
-  public:
-    MyGC() {}
-  };
-  
-  GCRegistry::Add<MyGC>
-  X("mygc", "My bespoke garbage collector.");
-}
- -

This boilerplate collector does nothing. More specifically:

- -
    -
  • llvm.gcread calls are replaced with the corresponding - load instruction.
  • -
  • llvm.gcwrite calls are replaced with the corresponding - store instruction.
  • -
  • No safe points are added to the code.
  • -
  • The stack map is not compiled into the executable.
  • -
- -

Using the LLVM makefiles (like the sample -project), this code can be compiled as a plugin using a simple -makefile:

- -
# lib/MyGC/Makefile
-
-LEVEL := ../..
-LIBRARYNAME = MyGC
-LOADABLE_MODULE = 1
-
-include $(LEVEL)/Makefile.common
- -

Once the plugin is compiled, code using it may be compiled using llc --load=MyGC.so (though MyGC.so may have some other -platform-specific extension):

- -
$ cat sample.ll
-define void @f() gc "mygc" {
-entry:
-        ret void
-}
-$ llvm-as < sample.ll | llc -load=MyGC.so
- -

It is also possible to statically link the collector plugin into tools, such -as a language-specific compiler front-end.

- - -

- Overview of available features -

- -
- -

GCStrategy provides a range of features through which a plugin -may do useful work. Some of these are callbacks, some are algorithms that can -be enabled, disabled, or customized. This matrix summarizes the supported (and -planned) features and correlates them with the collection techniques which -typically require them.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AlgorithmDoneshadow stackrefcountmark-sweepcopyingincrementalthreadedconcurrent
stack map
initialize roots
derived pointersNO✘*✘*
custom lowering
gcroot
gcwrite
gcread
safe points
in calls
before calls
for loopsNO
before escape
emit code at safe pointsNO
output
assembly
JITNO
objNO
live analysisNO
register mapNO
-
* Derived pointers only pose a - hazard to copying collectors.
-
in gray denotes a feature which - could be utilized if available.
-
- -

To be clear, the collection techniques above are defined as:

- -
-
Shadow Stack
-
The mutator carefully maintains a linked list of stack roots.
-
Reference Counting
-
The mutator maintains a reference count for each object and frees an - object when its count falls to zero.
-
Mark-Sweep
-
When the heap is exhausted, the collector marks reachable objects starting - from the roots, then deallocates unreachable objects in a sweep - phase.
-
Copying
-
As reachability analysis proceeds, the collector copies objects from one - heap area to another, compacting them in the process. Copying collectors - enable highly efficient "bump pointer" allocation and can improve locality - of reference.
-
Incremental
-
(Including generational collectors.) Incremental collectors generally have - all the properties of a copying collector (regardless of whether the - mature heap is compacting), but bring the added complexity of requiring - write barriers.
-
Threaded
-
Denotes a multithreaded mutator; the collector must still stop the mutator - ("stop the world") before beginning reachability analysis. Stopping a - multithreaded mutator is a complicated problem. It generally requires - highly platform specific code in the runtime, and the production of - carefully designed machine code at safe points.
-
Concurrent
-
In this technique, the mutator and the collector run concurrently, with - the goal of eliminating pause times. In a cooperative collector, - the mutator further aids with collection should a pause occur, allowing - collection to take advantage of multiprocessor hosts. The "stop the world" - problem of threaded collectors is generally still present to a limited - extent. Sophisticated marking algorithms are necessary. Read barriers may - be necessary.
-
- -

As the matrix indicates, LLVM's garbage collection infrastructure is already -suitable for a wide variety of collectors, but does not currently extend to -multithreaded programs. This will be added in the future as there is -interest.

- -
- - -

- Computing stack maps -

- -
- -

LLVM automatically computes a stack map. One of the most important features -of a GCStrategy is to compile this information into the executable in -the binary representation expected by the runtime library.

- -

The stack map consists of the location and identity of each GC root in the -each function in the module. For each root:

- -
    -
  • RootNum: The index of the root.
  • -
  • StackOffset: The offset of the object relative to the frame - pointer.
  • -
  • RootMetadata: The value passed as the %metadata - parameter to the @llvm.gcroot intrinsic.
  • -
- -

Also, for the function as a whole:

- -
    -
  • getFrameSize(): The overall size of the function's initial - stack frame, not accounting for any dynamic allocation.
  • -
  • roots_size(): The count of roots in the function.
  • -
- -

To access the stack map, use GCFunctionMetadata::roots_begin() and --end() from the GCMetadataPrinter:

- -
for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *FI = *I;
-  unsigned FrameSize = FI->getFrameSize();
-  size_t RootCount = FI->roots_size();
-
-  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
-                                      RE = FI->roots_end();
-                                      RI != RE; ++RI) {
-    int RootNum = RI->Num;
-    int RootStackOffset = RI->StackOffset;
-    Constant *RootMetadata = RI->Metadata;
-  }
-}
- -

If the llvm.gcroot intrinsic is eliminated before code generation by -a custom lowering pass, LLVM will compute an empty stack map. This may be useful -for collector plugins which implement reference counting or a shadow stack.

- -
- - - -

- Initializing roots to null: InitRoots -

- -
- -
MyGC::MyGC() {
-  InitRoots = true;
-}
- -

When set, LLVM will automatically initialize each root to null upon -entry to the function. This prevents the GC's sweep phase from visiting -uninitialized pointers, which will almost certainly cause it to crash. This -initialization occurs before custom lowering, so the two may be used -together.

- -

Since LLVM does not yet compute liveness information, there is no means of -distinguishing an uninitialized stack root from an initialized one. Therefore, -this feature should be used by all GC plugins. It is enabled by default.

- -
- - - -

- Custom lowering of intrinsics: CustomRoots, - CustomReadBarriers, and CustomWriteBarriers -

- -
- -

For GCs which use barriers or unusual treatment of stack roots, these -flags allow the collector to perform arbitrary transformations of the LLVM -IR:

- -
class MyGC : public GCStrategy {
-public:
-  MyGC() {
-    CustomRoots = true;
-    CustomReadBarriers = true;
-    CustomWriteBarriers = true;
-  }
-  
-  virtual bool initializeCustomLowering(Module &M);
-  virtual bool performCustomLowering(Function &F);
-};
- -

If any of these flags are set, then LLVM suppresses its default lowering for -the corresponding intrinsics and instead calls -performCustomLowering.

- -

LLVM's default action for each intrinsic is as follows:

- -
    -
  • llvm.gcroot: Leave it alone. The code generator must see it - or the stack map will not be computed.
  • -
  • llvm.gcread: Substitute a load instruction.
  • -
  • llvm.gcwrite: Substitute a store instruction.
  • -
- -

If CustomReadBarriers or CustomWriteBarriers are specified, -then performCustomLowering must eliminate the -corresponding barriers.

- -

performCustomLowering must comply with the same restrictions as FunctionPass::runOnFunction. -Likewise, initializeCustomLowering has the same semantics as Pass::doInitialization(Module&).

- -

The following can be used as a template:

- -
#include "llvm/Module.h"
-#include "llvm/IntrinsicInst.h"
-
-bool MyGC::initializeCustomLowering(Module &M) {
-  return false;
-}
-
-bool MyGC::performCustomLowering(Function &F) {
-  bool MadeChange = false;
-  
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; )
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
-        if (Function *F = CI->getCalledFunction())
-          switch (F->getIntrinsicID()) {
-          case Intrinsic::gcwrite:
-            // Handle llvm.gcwrite.
-            CI->eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcread:
-            // Handle llvm.gcread.
-            CI->eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcroot:
-            // Handle llvm.gcroot.
-            CI->eraseFromParent();
-            MadeChange = true;
-            break;
-          }
-  
-  return MadeChange;
-}
- -
- - - -

- Generating safe points: NeededSafePoints -

- -
- -

LLVM can compute four kinds of safe points:

- -
namespace GC {
-  /// PointKind - The type of a collector-safe point.
-  /// 
-  enum PointKind {
-    Loop,    //< Instr is a loop (backwards branch).
-    Return,  //< Instr is a return instruction.
-    PreCall, //< Instr is a call instruction.
-    PostCall //< Instr is the return address of a call.
-  };
-}
- -

A collector can request any combination of the four by setting the -NeededSafePoints mask:

- -
MyGC::MyGC() {
-  NeededSafePoints = 1 << GC::Loop
-                   | 1 << GC::Return
-                   | 1 << GC::PreCall
-                   | 1 << GC::PostCall;
-}
- -

It can then use the following routines to access safe points.

- -
for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *MD = *I;
-  size_t PointCount = MD->size();
-
-  for (GCFunctionInfo::iterator PI = MD->begin(),
-                                PE = MD->end(); PI != PE; ++PI) {
-    GC::PointKind PointKind = PI->Kind;
-    unsigned PointNum = PI->Num;
-  }
-}
-
- -

Almost every collector requires PostCall safe points, since these -correspond to the moments when the function is suspended during a call to a -subroutine.

- -

Threaded programs generally require Loop safe points to guarantee -that the application will reach a safe point within a bounded amount of time, -even if it is executing a long-running loop which contains no function -calls.

- -

Threaded collectors may also require Return and PreCall -safe points to implement "stop the world" techniques using self-modifying code, -where it is important that the program not exit the function without reaching a -safe point (because only the topmost function has been patched).

- -
- - - -

- Emitting assembly code: GCMetadataPrinter -

- -
- -

LLVM allows a plugin to print arbitrary assembly code before and after the -rest of a module's assembly code. At the end of the module, the GC can compile -the LLVM stack map into assembly code. (At the beginning, this information is not -yet computed.)

- -

Since AsmWriter and CodeGen are separate components of LLVM, a separate -abstract base class and registry is provided for printing assembly code, the -GCMetadaPrinter and GCMetadataPrinterRegistry. The AsmWriter -will look for such a subclass if the GCStrategy sets -UsesMetadata:

- -
MyGC::MyGC() {
-  UsesMetadata = true;
-}
- -

This separation allows JIT-only clients to be smaller.

- -

Note that LLVM does not currently have analogous APIs to support code -generation in the JIT, nor using the object writers.

- -
// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
-
-#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
-  public:
-    virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
-                               const TargetAsmInfo &TAI);
-  
-    virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
-                                const TargetAsmInfo &TAI);
-  };
-  
-  GCMetadataPrinterRegistry::Add<MyGCPrinter>
-  X("mygc", "My bespoke garbage collector.");
-}
- -

The collector should use AsmPrinter and TargetAsmInfo to -print portable assembly code to the std::ostream. The collector itself -contains the stack map for the entire module, and may access the -GCFunctionInfo using its own begin() and end() -methods. Here's a realistic example:

- -
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetAsmInfo.h"
-
-void MyGCPrinter::beginAssembly(std::ostream &OS, AsmPrinter &AP,
-                                const TargetAsmInfo &TAI) {
-  // Nothing to do.
-}
-
-void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP,
-                                 const TargetAsmInfo &TAI) {
-  // Set up for emitting addresses.
-  const char *AddressDirective;
-  int AddressAlignLog;
-  if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = TAI.getData32bitsDirective();
-    AddressAlignLog = 2;
-  } else {
-    AddressDirective = TAI.getData64bitsDirective();
-    AddressAlignLog = 3;
-  }
-  
-  // Put this in the data section.
-  AP.SwitchToDataSection(TAI.getDataSection());
-  
-  // For each function...
-  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
-    GCFunctionInfo &MD = **FI;
-    
-    // Emit this data structure:
-    // 
-    // struct {
-    //   int32_t PointCount;
-    //   struct {
-    //     void *SafePointAddress;
-    //     int32_t LiveCount;
-    //     int32_t LiveOffsets[LiveCount];
-    //   } Points[PointCount];
-    // } __gcmap_<FUNCTIONNAME>;
-    
-    // Align to address width.
-    AP.EmitAlignment(AddressAlignLog);
-    
-    // Emit the symbol by which the stack map entry can be found.
-    std::string Symbol;
-    Symbol += TAI.getGlobalPrefix();
-    Symbol += "__gcmap_";
-    Symbol += MD.getFunction().getName();
-    if (const char *GlobalDirective = TAI.getGlobalDirective())
-      OS << GlobalDirective << Symbol << "\n";
-    OS << TAI.getGlobalPrefix() << Symbol << ":\n";
-    
-    // Emit PointCount.
-    AP.EmitInt32(MD.size());
-    AP.EOL("safe point count");
-    
-    // And each safe point...
-    for (GCFunctionInfo::iterator PI = MD.begin(),
-                                     PE = MD.end(); PI != PE; ++PI) {
-      // Align to address width.
-      AP.EmitAlignment(AddressAlignLog);
-      
-      // Emit the address of the safe point.
-      OS << AddressDirective
-         << TAI.getPrivateGlobalPrefix() << "label" << PI->Num;
-      AP.EOL("safe point address");
-      
-      // Emit the stack frame size.
-      AP.EmitInt32(MD.getFrameSize());
-      AP.EOL("stack frame size");
-      
-      // Emit the number of live roots in the function.
-      AP.EmitInt32(MD.live_size(PI));
-      AP.EOL("live root count");
-      
-      // And for each live root...
-      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
-                                            LE = MD.live_end(PI);
-                                            LI != LE; ++LI) {
-        // Print its offset within the stack frame.
-        AP.EmitInt32(LI->StackOffset);
-        AP.EOL("stack offset");
-      }
-    }
-  }
-}
-
- -
- -
- - -

- References -

- - -
- -

[Appel89] Runtime Tags Aren't Necessary. Andrew -W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.

- -

[Goldberg91] Tag-free garbage collection for -strongly typed programming languages. Benjamin Goldberg. ACM SIGPLAN -PLDI'91.

- -

[Tolmach94] Tag-free garbage collection using -explicit type parameters. Andrew Tolmach. Proceedings of the 1994 ACM -conference on LISP and functional programming.

- -

[Henderson2002] -Accurate Garbage Collection in an Uncooperative Environment. -Fergus Henderson. International Symposium on Memory Management 2002.

- -
- - - - -
-
- Valid CSS - Valid HTML 4.01 - - Chris Lattner
- LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - - diff --git a/docs/GarbageCollection.rst b/docs/GarbageCollection.rst new file mode 100644 index 000000000000..5c3a1af23cd3 --- /dev/null +++ b/docs/GarbageCollection.rst @@ -0,0 +1,1029 @@ +===================================== +Accurate Garbage Collection with LLVM +===================================== + +.. contents:: + :local: + +Introduction +============ + +Garbage collection is a widely used technique that frees the programmer from +having to know the lifetimes of heap objects, making software easier to produce +and maintain. Many programming languages rely on garbage collection for +automatic memory management. There are two primary forms of garbage collection: +conservative and accurate. + +Conservative garbage collection often does not require any special support from +either the language or the compiler: it can handle non-type-safe programming +languages (such as C/C++) and does not require any special information from the +compiler. The `Boehm collector +`__ is an example of a +state-of-the-art conservative collector. + +Accurate garbage collection requires the ability to identify all pointers in the +program at run-time (which requires that the source-language be type-safe in +most cases). Identifying pointers at run-time requires compiler support to +locate all places that hold live pointer variables at run-time, including the +:ref:`processor stack and registers `. + +Conservative garbage collection is attractive because it does not require any +special compiler support, but it does have problems. In particular, because the +conservative garbage collector cannot *know* that a particular word in the +machine is a pointer, it cannot move live objects in the heap (preventing the +use of compacting and generational GC algorithms) and it can occasionally suffer +from memory leaks due to integer values that happen to point to objects in the +program. In addition, some aggressive compiler transformations can break +conservative garbage collectors (though these seem rare in practice). + +Accurate garbage collectors do not suffer from any of these problems, but they +can suffer from degraded scalar optimization of the program. In particular, +because the runtime must be able to identify and update all pointers active in +the program, some optimizations are less effective. In practice, however, the +locality and performance benefits of using aggressive garbage collection +techniques dominates any low-level losses. + +This document describes the mechanisms and interfaces provided by LLVM to +support accurate garbage collection. + +Goals and non-goals +------------------- + +LLVM's intermediate representation provides :ref:`garbage collection intrinsics +` that offer support for a broad class of collector models. For +instance, the intrinsics permit: + +* semi-space collectors + +* mark-sweep collectors + +* generational collectors + +* reference counting + +* incremental collectors + +* concurrent collectors + +* cooperative collectors + +We hope that the primitive support built into the LLVM IR is sufficient to +support a broad class of garbage collected languages including Scheme, ML, Java, +C#, Perl, Python, Lua, Ruby, other scripting languages, and more. + +However, LLVM does not itself provide a garbage collector --- this should be +part of your language's runtime library. LLVM provides a framework for compile +time :ref:`code generation plugins `. The role of these plugins is to +generate code and data structures which conforms to the *binary interface* +specified by the *runtime library*. This is similar to the relationship between +LLVM and DWARF debugging info, for example. The difference primarily lies in +the lack of an established standard in the domain of garbage collection --- thus +the plugins. + +The aspects of the binary interface with which LLVM's GC support is +concerned are: + +* Creation of GC-safe points within code where collection is allowed to execute + safely. + +* Computation of the stack map. For each safe point in the code, object + references within the stack frame must be identified so that the collector may + traverse and perhaps update them. + +* Write barriers when storing object references to the heap. These are commonly + used to optimize incremental scans in generational collectors. + +* Emission of read barriers when loading object references. These are useful + for interoperating with concurrent collectors. + +There are additional areas that LLVM does not directly address: + +* Registration of global roots with the runtime. + +* Registration of stack map entries with the runtime. + +* The functions used by the program to allocate memory, trigger a collection, + etc. + +* Computation or compilation of type maps, or registration of them with the + runtime. These are used to crawl the heap for object references. + +In general, LLVM's support for GC does not include features which can be +adequately addressed with other features of the IR and does not specify a +particular binary interface. On the plus side, this means that you should be +able to integrate LLVM with an existing runtime. On the other hand, it leaves a +lot of work for the developer of a novel language. However, it's easy to get +started quickly and scale up to a more sophisticated implementation as your +compiler matures. + +Getting started +=============== + +Using a GC with LLVM implies many things, for example: + +* Write a runtime library or find an existing one which implements a GC heap. + + #. Implement a memory allocator. + + #. Design a binary interface for the stack map, used to identify references + within a stack frame on the machine stack.\* + + #. Implement a stack crawler to discover functions on the call stack.\* + + #. Implement a registry for global roots. + + #. Design a binary interface for type maps, used to identify references + within heap objects. + + #. Implement a collection routine bringing together all of the above. + +* Emit compatible code from your compiler. + + * Initialization in the main function. + + * Use the ``gc "..."`` attribute to enable GC code generation (or + ``F.setGC("...")``). + + * Use ``@llvm.gcroot`` to mark stack roots. + + * Use ``@llvm.gcread`` and/or ``@llvm.gcwrite`` to manipulate GC references, + if necessary. + + * Allocate memory using the GC allocation routine provided by the runtime + library. + + * Generate type maps according to your runtime's binary interface. + +* Write a compiler plugin to interface LLVM with the runtime library.\* + + * Lower ``@llvm.gcread`` and ``@llvm.gcwrite`` to appropriate code + sequences.\* + + * Compile LLVM's stack map to the binary form expected by the runtime. + +* Load the plugin into the compiler. Use ``llc -load`` or link the plugin + statically with your language's compiler.\* + +* Link program executables with the runtime. + +To help with several of these tasks (those indicated with a \*), LLVM includes a +highly portable, built-in ShadowStack code generator. It is compiled into +``llc`` and works even with the interpreter and C backends. + +In your compiler +---------------- + +To turn the shadow stack on for your functions, first call: + +.. code-block:: c++ + + F.setGC("shadow-stack"); + +for each function your compiler emits. Since the shadow stack is built into +LLVM, you do not need to load a plugin. + +Your compiler must also use ``@llvm.gcroot`` as documented. Don't forget to +create a root for each intermediate value that is generated when evaluating an +expression. In ``h(f(), g())``, the result of ``f()`` could easily be collected +if evaluating ``g()`` triggers a collection. + +There's no need to use ``@llvm.gcread`` and ``@llvm.gcwrite`` over plain +``load`` and ``store`` for now. You will need them when switching to a more +advanced GC. + +In your runtime +--------------- + +The shadow stack doesn't imply a memory allocation algorithm. A semispace +collector or building atop ``malloc`` are great places to start, and can be +implemented with very little code. + +When it comes time to collect, however, your runtime needs to traverse the stack +roots, and for this it needs to integrate with the shadow stack. Luckily, doing +so is very simple. (This code is heavily commented to help you understand the +data structure, but there are only 20 lines of meaningful code.) + +.. code-block:: c++ + + /// @brief The map for a single function's stack frame. One of these is + /// compiled as constant data into the executable for each function. + /// + /// Storage of metadata values is elided if the %metadata parameter to + /// @llvm.gcroot is null. + struct FrameMap { + int32_t NumRoots; //< Number of roots in stack frame. + int32_t NumMeta; //< Number of metadata entries. May be < NumRoots. + const void *Meta[0]; //< Metadata for each root. + }; + + /// @brief A link in the dynamic shadow stack. One of these is embedded in + /// the stack frame of each function on the call stack. + struct StackEntry { + StackEntry *Next; //< Link to next stack entry (the caller's). + const FrameMap *Map; //< Pointer to constant FrameMap. + void *Roots[0]; //< Stack roots (in-place array). + }; + + /// @brief The head of the singly-linked list of StackEntries. Functions push + /// and pop onto this in their prologue and epilogue. + /// + /// Since there is only a global list, this technique is not threadsafe. + StackEntry *llvm_gc_root_chain; + + /// @brief Calls Visitor(root, meta) for each GC root on the stack. + /// root and meta are exactly the values passed to + /// @llvm.gcroot. + /// + /// Visitor could be a function to recursively mark live objects. Or it + /// might copy them to another heap or generation. + /// + /// @param Visitor A function to invoke for every GC root on the stack. + void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) { + for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) { + unsigned i = 0; + + // For roots [0, NumMeta), the metadata pointer is in the FrameMap. + for (unsigned e = R->Map->NumMeta; i != e; ++i) + Visitor(&R->Roots[i], R->Map->Meta[i]); + + // For roots [NumMeta, NumRoots), the metadata pointer is null. + for (unsigned e = R->Map->NumRoots; i != e; ++i) + Visitor(&R->Roots[i], NULL); + } + } + +About the shadow stack +---------------------- + +Unlike many GC algorithms which rely on a cooperative code generator to compile +stack maps, this algorithm carefully maintains a linked list of stack roots +[:ref:`Henderson2002 `]. This so-called "shadow stack" mirrors the +machine stack. Maintaining this data structure is slower than using a stack map +compiled into the executable as constant data, but has a significant portability +advantage because it requires no special support from the target code generator, +and does not require tricky platform-specific code to crawl the machine stack. + +The tradeoff for this simplicity and portability is: + +* High overhead per function call. + +* Not thread-safe. + +Still, it's an easy way to get started. After your compiler and runtime are up +and running, writing a :ref:`plugin ` will allow you to take advantage +of :ref:`more advanced GC features ` of LLVM in order to +improve performance. + +.. _gc_intrinsics: + +IR features +=========== + +This section describes the garbage collection facilities provided by the +:doc:`LLVM intermediate representation `. The exact behavior of these +IR features is specified by the binary interface implemented by a :ref:`code +generation plugin `, not by this document. + +These facilities are limited to those strictly necessary; they are not intended +to be a complete interface to any garbage collector. A program will need to +interface with the GC library using the facilities provided by that program. + +Specifying GC code generation: ``gc "..."`` +------------------------------------------- + +.. code-block:: llvm + + define ty @name(...) gc "name" { ... + +The ``gc`` function attribute is used to specify the desired GC style to the +compiler. Its programmatic equivalent is the ``setGC`` method of ``Function``. + +Setting ``gc "name"`` on a function triggers a search for a matching code +generation plugin "*name*"; it is that plugin which defines the exact nature of +the code generated to support GC. If none is found, the compiler will raise an +error. + +Specifying the GC style on a per-function basis allows LLVM to link together +programs that use different garbage collection algorithms (or none at all). + +.. _gcroot: + +Identifying GC roots on the stack: ``llvm.gcroot`` +-------------------------------------------------- + +.. code-block:: llvm + + void @llvm.gcroot(i8** %ptrloc, i8* %metadata) + +The ``llvm.gcroot`` intrinsic is used to inform LLVM that a stack variable +references an object on the heap and is to be tracked for garbage collection. +The exact impact on generated code is specified by a :ref:`compiler plugin +`. All calls to ``llvm.gcroot`` **must** reside inside the first basic +block. + +A compiler which uses mem2reg to raise imperative code using ``alloca`` into SSA +form need only add a call to ``@llvm.gcroot`` for those variables which a +pointers into the GC heap. + +It is also important to mark intermediate values with ``llvm.gcroot``. For +example, consider ``h(f(), g())``. Beware leaking the result of ``f()`` in the +case that ``g()`` triggers a collection. Note, that stack variables must be +initialized and marked with ``llvm.gcroot`` in function's prologue. + +The first argument **must** be a value referring to an alloca instruction or a +bitcast of an alloca. The second contains a pointer to metadata that should be +associated with the pointer, and **must** be a constant or global value +address. If your target collector uses tags, use a null pointer for metadata. + +The ``%metadata`` argument can be used to avoid requiring heap objects to have +'isa' pointers or tag bits. [Appel89_, Goldberg91_, Tolmach94_] If specified, +its value will be tracked along with the location of the pointer in the stack +frame. + +Consider the following fragment of Java code: + +.. code-block:: java + + { + Object X; // A null-initialized reference to an object + ... + } + +This block (which may be located in the middle of a function or in a loop nest), +could be compiled to this LLVM code: + +.. code-block:: llvm + + Entry: + ;; In the entry block for the function, allocate the + ;; stack space for X, which is an LLVM pointer. + %X = alloca %Object* + + ;; Tell LLVM that the stack space is a stack root. + ;; Java has type-tags on objects, so we pass null as metadata. + %tmp = bitcast %Object** %X to i8** + call void @llvm.gcroot(i8** %tmp, i8* null) + ... + + ;; "CodeBlock" is the block corresponding to the start + ;; of the scope above. + CodeBlock: + ;; Java null-initializes pointers. + store %Object* null, %Object** %X + + ... + + ;; As the pointer goes out of scope, store a null value into + ;; it, to indicate that the value is no longer live. + store %Object* null, %Object** %X + ... + +Reading and writing references in the heap +------------------------------------------ + +Some collectors need to be informed when the mutator (the program that needs +garbage collection) either reads a pointer from or writes a pointer to a field +of a heap object. The code fragments inserted at these points are called *read +barriers* and *write barriers*, respectively. The amount of code that needs to +be executed is usually quite small and not on the critical path of any +computation, so the overall performance impact of the barrier is tolerable. + +Barriers often require access to the *object pointer* rather than the *derived +pointer* (which is a pointer to the field within the object). Accordingly, +these intrinsics take both pointers as separate arguments for completeness. In +this snippet, ``%object`` is the object pointer, and ``%derived`` is the derived +pointer: + +.. code-block:: llvm + + ;; An array type. + %class.Array = type { %class.Object, i32, [0 x %class.Object*] } + ... + + ;; Load the object pointer from a gcroot. + %object = load %class.Array** %object_addr + + ;; Compute the derived pointer. + %derived = getelementptr %object, i32 0, i32 2, i32 %n + +LLVM does not enforce this relationship between the object and derived pointer +(although a :ref:`plugin ` might). However, it would be an unusual +collector that violated it. + +The use of these intrinsics is naturally optional if the target GC does require +the corresponding barrier. Such a GC plugin will replace the intrinsic calls +with the corresponding ``load`` or ``store`` instruction if they are used. + +Write barrier: ``llvm.gcwrite`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived) + +For write barriers, LLVM provides the ``llvm.gcwrite`` intrinsic function. It +has exactly the same semantics as a non-volatile ``store`` to the derived +pointer (the third argument). The exact code generated is specified by a +compiler :ref:`plugin `. + +Many important algorithms require write barriers, including generational and +concurrent collectors. Additionally, write barriers could be used to implement +reference counting. + +Read barrier: ``llvm.gcread`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + i8* @llvm.gcread(i8* %object, i8** %derived) + +For read barriers, LLVM provides the ``llvm.gcread`` intrinsic function. It has +exactly the same semantics as a non-volatile ``load`` from the derived pointer +(the second argument). The exact code generated is specified by a +:ref:`compiler plugin `. + +Read barriers are needed by fewer algorithms than write barriers, and may have a +greater performance impact since pointer reads are more frequent than writes. + +.. _plugin: + +Implementing a collector plugin +=============================== + +User code specifies which GC code generation to use with the ``gc`` function +attribute or, equivalently, with the ``setGC`` method of ``Function``. + +To implement a GC plugin, it is necessary to subclass ``llvm::GCStrategy``, +which can be accomplished in a few lines of boilerplate code. LLVM's +infrastructure provides access to several important algorithms. For an +uncontroversial collector, all that remains may be to compile LLVM's computed +stack map to assembly code (using the binary representation expected by the +runtime library). This can be accomplished in about 100 lines of code. + +This is not the appropriate place to implement a garbage collected heap or a +garbage collector itself. That code should exist in the language's runtime +library. The compiler plugin is responsible for generating code which conforms +to the binary interface defined by library, most essentially the :ref:`stack map +`. + +To subclass ``llvm::GCStrategy`` and register it with the compiler: + +.. code-block:: c++ + + // lib/MyGC/MyGC.cpp - Example LLVM GC plugin + + #include "llvm/CodeGen/GCStrategy.h" + #include "llvm/CodeGen/GCMetadata.h" + #include "llvm/Support/Compiler.h" + + using namespace llvm; + + namespace { + class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy { + public: + MyGC() {} + }; + + GCRegistry::Add + X("mygc", "My bespoke garbage collector."); + } + +This boilerplate collector does nothing. More specifically: + +* ``llvm.gcread`` calls are replaced with the corresponding ``load`` + instruction. + +* ``llvm.gcwrite`` calls are replaced with the corresponding ``store`` + instruction. + +* No safe points are added to the code. + +* The stack map is not compiled into the executable. + +Using the LLVM makefiles (like the `sample project +`__), this code +can be compiled as a plugin using a simple makefile: + +.. code-block:: make + + # lib/MyGC/Makefile + + LEVEL := ../.. + LIBRARYNAME = MyGC + LOADABLE_MODULE = 1 + + include $(LEVEL)/Makefile.common + +Once the plugin is compiled, code using it may be compiled using ``llc +-load=MyGC.so`` (though MyGC.so may have some other platform-specific +extension): + +:: + + $ cat sample.ll + define void @f() gc "mygc" { + entry: + ret void + } + $ llvm-as < sample.ll | llc -load=MyGC.so + +It is also possible to statically link the collector plugin into tools, such as +a language-specific compiler front-end. + +.. _collector-algos: + +Overview of available features +------------------------------ + +``GCStrategy`` provides a range of features through which a plugin may do useful +work. Some of these are callbacks, some are algorithms that can be enabled, +disabled, or customized. This matrix summarizes the supported (and planned) +features and correlates them with the collection techniques which typically +require them. + +.. |v| unicode:: 0x2714 + :trim: + +.. |x| unicode:: 0x2718 + :trim: + ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| Algorithm | Done | Shadow | refcount | mark- | copying | incremental | threaded | concurrent | +| | | stack | | sweep | | | | | ++============+======+========+==========+=======+=========+=============+==========+============+ +| stack map | |v| | | | |x| | |x| | |x| | |x| | |x| | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| initialize | |v| | |x| | |x| | |x| | |x| | |x| | |x| | |x| | +| roots | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| derived | NO | | | | | | **N**\* | **N**\* | +| pointers | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| **custom | |v| | | | | | | | | +| lowering** | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *gcroot* | |v| | |x| | |x| | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *gcwrite* | |v| | | |x| | | | |x| | | |x| | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *gcread* | |v| | | | | | | | |x| | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| **safe | | | | | | | | | +| points** | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *in | |v| | | | |x| | |x| | |x| | |x| | |x| | +| calls* | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *before | |v| | | | | | | |x| | |x| | +| calls* | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *for | NO | | | | | | **N** | **N** | +| loops* | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *before | |v| | | | | | | |x| | |x| | +| escape* | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| emit code | NO | | | | | | **N** | **N** | +| at safe | | | | | | | | | +| points | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| **output** | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *assembly* | |v| | | | |x| | |x| | |x| | |x| | |x| | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *JIT* | NO | | | **?** | **?** | **?** | **?** | **?** | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| *obj* | NO | | | **?** | **?** | **?** | **?** | **?** | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| live | NO | | | **?** | **?** | **?** | **?** | **?** | +| analysis | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| register | NO | | | **?** | **?** | **?** | **?** | **?** | +| map | | | | | | | | | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| \* Derived pointers only pose a hasard to copying collections. | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ +| **?** denotes a feature which could be utilized if available. | ++------------+------+--------+----------+-------+---------+-------------+----------+------------+ + +To be clear, the collection techniques above are defined as: + +Shadow Stack + The mutator carefully maintains a linked list of stack roots. + +Reference Counting + The mutator maintains a reference count for each object and frees an object + when its count falls to zero. + +Mark-Sweep + When the heap is exhausted, the collector marks reachable objects starting + from the roots, then deallocates unreachable objects in a sweep phase. + +Copying + As reachability analysis proceeds, the collector copies objects from one heap + area to another, compacting them in the process. Copying collectors enable + highly efficient "bump pointer" allocation and can improve locality of + reference. + +Incremental + (Including generational collectors.) Incremental collectors generally have all + the properties of a copying collector (regardless of whether the mature heap + is compacting), but bring the added complexity of requiring write barriers. + +Threaded + Denotes a multithreaded mutator; the collector must still stop the mutator + ("stop the world") before beginning reachability analysis. Stopping a + multithreaded mutator is a complicated problem. It generally requires highly + platform specific code in the runtime, and the production of carefully + designed machine code at safe points. + +Concurrent + In this technique, the mutator and the collector run concurrently, with the + goal of eliminating pause times. In a *cooperative* collector, the mutator + further aids with collection should a pause occur, allowing collection to take + advantage of multiprocessor hosts. The "stop the world" problem of threaded + collectors is generally still present to a limited extent. Sophisticated + marking algorithms are necessary. Read barriers may be necessary. + +As the matrix indicates, LLVM's garbage collection infrastructure is already +suitable for a wide variety of collectors, but does not currently extend to +multithreaded programs. This will be added in the future as there is +interest. + +.. _stack-map: + +Computing stack maps +-------------------- + +LLVM automatically computes a stack map. One of the most important features +of a ``GCStrategy`` is to compile this information into the executable in +the binary representation expected by the runtime library. + +The stack map consists of the location and identity of each GC root in the +each function in the module. For each root: + +* ``RootNum``: The index of the root. + +* ``StackOffset``: The offset of the object relative to the frame pointer. + +* ``RootMetadata``: The value passed as the ``%metadata`` parameter to the + ``@llvm.gcroot`` intrinsic. + +Also, for the function as a whole: + +* ``getFrameSize()``: The overall size of the function's initial stack frame, + not accounting for any dynamic allocation. + +* ``roots_size()``: The count of roots in the function. + +To access the stack map, use ``GCFunctionMetadata::roots_begin()`` and +-``end()`` from the :ref:`GCMetadataPrinter `: + +.. code-block:: c++ + + for (iterator I = begin(), E = end(); I != E; ++I) { + GCFunctionInfo *FI = *I; + unsigned FrameSize = FI->getFrameSize(); + size_t RootCount = FI->roots_size(); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), + RE = FI->roots_end(); + RI != RE; ++RI) { + int RootNum = RI->Num; + int RootStackOffset = RI->StackOffset; + Constant *RootMetadata = RI->Metadata; + } + } + +If the ``llvm.gcroot`` intrinsic is eliminated before code generation by a +custom lowering pass, LLVM will compute an empty stack map. This may be useful +for collector plugins which implement reference counting or a shadow stack. + +.. _init-roots: + +Initializing roots to null: ``InitRoots`` +----------------------------------------- + +.. code-block:: c++ + + MyGC::MyGC() { + InitRoots = true; + } + +When set, LLVM will automatically initialize each root to ``null`` upon entry to +the function. This prevents the GC's sweep phase from visiting uninitialized +pointers, which will almost certainly cause it to crash. This initialization +occurs before custom lowering, so the two may be used together. + +Since LLVM does not yet compute liveness information, there is no means of +distinguishing an uninitialized stack root from an initialized one. Therefore, +this feature should be used by all GC plugins. It is enabled by default. + +Custom lowering of intrinsics: ``CustomRoots``, ``CustomReadBarriers``, and ``CustomWriteBarriers`` +--------------------------------------------------------------------------------------------------- + +For GCs which use barriers or unusual treatment of stack roots, these flags +allow the collector to perform arbitrary transformations of the LLVM IR: + +.. code-block:: c++ + + class MyGC : public GCStrategy { + public: + MyGC() { + CustomRoots = true; + CustomReadBarriers = true; + CustomWriteBarriers = true; + } + + virtual bool initializeCustomLowering(Module &M); + virtual bool performCustomLowering(Function &F); + }; + +If any of these flags are set, then LLVM suppresses its default lowering for the +corresponding intrinsics and instead calls ``performCustomLowering``. + +LLVM's default action for each intrinsic is as follows: + +* ``llvm.gcroot``: Leave it alone. The code generator must see it or the stack + map will not be computed. + +* ``llvm.gcread``: Substitute a ``load`` instruction. + +* ``llvm.gcwrite``: Substitute a ``store`` instruction. + +If ``CustomReadBarriers`` or ``CustomWriteBarriers`` are specified, then +``performCustomLowering`` **must** eliminate the corresponding barriers. + +``performCustomLowering`` must comply with the same restrictions as +:ref:`FunctionPass::runOnFunction ` +Likewise, ``initializeCustomLowering`` has the same semantics as +:ref:`Pass::doInitialization(Module&) +` + +The following can be used as a template: + +.. code-block:: c++ + + #include "llvm/Module.h" + #include "llvm/IntrinsicInst.h" + + bool MyGC::initializeCustomLowering(Module &M) { + return false; + } + + bool MyGC::performCustomLowering(Function &F) { + bool MadeChange = false; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) + if (IntrinsicInst *CI = dyn_cast(II++)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + // Handle llvm.gcwrite. + CI->eraseFromParent(); + MadeChange = true; + break; + case Intrinsic::gcread: + // Handle llvm.gcread. + CI->eraseFromParent(); + MadeChange = true; + break; + case Intrinsic::gcroot: + // Handle llvm.gcroot. + CI->eraseFromParent(); + MadeChange = true; + break; + } + + return MadeChange; + } + +.. _safe-points: + +Generating safe points: ``NeededSafePoints`` +-------------------------------------------- + +LLVM can compute four kinds of safe points: + +.. code-block:: c++ + + namespace GC { + /// PointKind - The type of a collector-safe point. + /// + enum PointKind { + Loop, //< Instr is a loop (backwards branch). + Return, //< Instr is a return instruction. + PreCall, //< Instr is a call instruction. + PostCall //< Instr is the return address of a call. + }; + } + +A collector can request any combination of the four by setting the +``NeededSafePoints`` mask: + +.. code-block:: c++ + + MyGC::MyGC() { + NeededSafePoints = 1 << GC::Loop + | 1 << GC::Return + | 1 << GC::PreCall + | 1 << GC::PostCall; + } + +It can then use the following routines to access safe points. + +.. code-block:: c++ + + for (iterator I = begin(), E = end(); I != E; ++I) { + GCFunctionInfo *MD = *I; + size_t PointCount = MD->size(); + + for (GCFunctionInfo::iterator PI = MD->begin(), + PE = MD->end(); PI != PE; ++PI) { + GC::PointKind PointKind = PI->Kind; + unsigned PointNum = PI->Num; + } + } + +Almost every collector requires ``PostCall`` safe points, since these correspond +to the moments when the function is suspended during a call to a subroutine. + +Threaded programs generally require ``Loop`` safe points to guarantee that the +application will reach a safe point within a bounded amount of time, even if it +is executing a long-running loop which contains no function calls. + +Threaded collectors may also require ``Return`` and ``PreCall`` safe points to +implement "stop the world" techniques using self-modifying code, where it is +important that the program not exit the function without reaching a safe point +(because only the topmost function has been patched). + +.. _assembly: + +Emitting assembly code: ``GCMetadataPrinter`` +--------------------------------------------- + +LLVM allows a plugin to print arbitrary assembly code before and after the rest +of a module's assembly code. At the end of the module, the GC can compile the +LLVM stack map into assembly code. (At the beginning, this information is not +yet computed.) + +Since AsmWriter and CodeGen are separate components of LLVM, a separate abstract +base class and registry is provided for printing assembly code, the +``GCMetadaPrinter`` and ``GCMetadataPrinterRegistry``. The AsmWriter will look +for such a subclass if the ``GCStrategy`` sets ``UsesMetadata``: + +.. code-block:: c++ + + MyGC::MyGC() { + UsesMetadata = true; + } + +This separation allows JIT-only clients to be smaller. + +Note that LLVM does not currently have analogous APIs to support code generation +in the JIT, nor using the object writers. + +.. code-block:: c++ + + // lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer + + #include "llvm/CodeGen/GCMetadataPrinter.h" + #include "llvm/Support/Compiler.h" + + using namespace llvm; + + namespace { + class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter { + public: + virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + + virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + }; + + GCMetadataPrinterRegistry::Add + X("mygc", "My bespoke garbage collector."); + } + +The collector should use ``AsmPrinter`` and ``TargetAsmInfo`` to print portable +assembly code to the ``std::ostream``. The collector itself contains the stack +map for the entire module, and may access the ``GCFunctionInfo`` using its own +``begin()`` and ``end()`` methods. Here's a realistic example: + +.. code-block:: c++ + + #include "llvm/CodeGen/AsmPrinter.h" + #include "llvm/Function.h" + #include "llvm/Target/TargetMachine.h" + #include "llvm/DataLayout.h" + #include "llvm/Target/TargetAsmInfo.h" + + void MyGCPrinter::beginAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Nothing to do. + } + + void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Set up for emitting addresses. + const char *AddressDirective; + int AddressAlignLog; + if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) { + AddressDirective = TAI.getData32bitsDirective(); + AddressAlignLog = 2; + } else { + AddressDirective = TAI.getData64bitsDirective(); + AddressAlignLog = 3; + } + + // Put this in the data section. + AP.SwitchToDataSection(TAI.getDataSection()); + + // For each function... + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + GCFunctionInfo &MD = **FI; + + // Emit this data structure: + // + // struct { + // int32_t PointCount; + // struct { + // void *SafePointAddress; + // int32_t LiveCount; + // int32_t LiveOffsets[LiveCount]; + // } Points[PointCount]; + // } __gcmap_; + + // Align to address width. + AP.EmitAlignment(AddressAlignLog); + + // Emit the symbol by which the stack map entry can be found. + std::string Symbol; + Symbol += TAI.getGlobalPrefix(); + Symbol += "__gcmap_"; + Symbol += MD.getFunction().getName(); + if (const char *GlobalDirective = TAI.getGlobalDirective()) + OS << GlobalDirective << Symbol << "\n"; + OS << TAI.getGlobalPrefix() << Symbol << ":\n"; + + // Emit PointCount. + AP.EmitInt32(MD.size()); + AP.EOL("safe point count"); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), + PE = MD.end(); PI != PE; ++PI) { + // Align to address width. + AP.EmitAlignment(AddressAlignLog); + + // Emit the address of the safe point. + OS << AddressDirective + << TAI.getPrivateGlobalPrefix() << "label" << PI->Num; + AP.EOL("safe point address"); + + // Emit the stack frame size. + AP.EmitInt32(MD.getFrameSize()); + AP.EOL("stack frame size"); + + // Emit the number of live roots in the function. + AP.EmitInt32(MD.live_size(PI)); + AP.EOL("live root count"); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Print its offset within the stack frame. + AP.EmitInt32(LI->StackOffset); + AP.EOL("stack offset"); + } + } + } + } + +References +========== + +.. _appel89: + +[Appel89] Runtime Tags Aren't Necessary. Andrew W. Appel. Lisp and Symbolic +Computation 19(7):703-705, July 1989. + +.. _goldberg91: + +[Goldberg91] Tag-free garbage collection for strongly typed programming +languages. Benjamin Goldberg. ACM SIGPLAN PLDI'91. + +.. _tolmach94: + +[Tolmach94] Tag-free garbage collection using explicit type parameters. Andrew +Tolmach. Proceedings of the 1994 ACM conference on LISP and functional +programming. + +.. _henderson02: + +[Henderson2002] `Accurate Garbage Collection in an Uncooperative Environment +`__ + diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst index f6f904b2e35e..306a2a87effd 100644 --- a/docs/GetElementPtr.rst +++ b/docs/GetElementPtr.rst @@ -1,5 +1,3 @@ -.. _gep: - ======================================= The Often Misunderstood GEP Instruction ======================================= @@ -22,7 +20,7 @@ Address Computation When people are first confronted with the GEP instruction, they tend to relate it to known concepts from other programming paradigms, most notably C array indexing and field selection. GEP closely resembles C array indexing and field -selection, however it's is a little different and this leads to the following +selection, however it is a little different and this leads to the following questions. What is the first index of the GEP instruction? @@ -190,7 +188,7 @@ In this example, we have a global variable, ``%MyVar`` that is a pointer to a structure containing a pointer to an array of 40 ints. The GEP instruction seems to be accessing the 18th integer of the structure's array of ints. However, this is actually an illegal GEP instruction. It won't compile. The reason is that the -pointer in the structure must be dereferenced in order to index into the +pointer in the structure *must* be dereferenced in order to index into the array of 40 ints. Since the GEP instruction never accesses memory, it is illegal. @@ -416,7 +414,7 @@ arithmetic, and inttoptr sequences. Can I compute the distance between two objects, and add that value to one address to compute the other address? --------------------------------------------------------------------------------------------------------------- -As with arithmetic on null, You can use GEP to compute an address that way, but +As with arithmetic on null, you can use GEP to compute an address that way, but you can't use that pointer to actually access the object if you do, unless the object is managed outside of LLVM. diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 68768921f6ae..0bbbafc6e690 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -1,9 +1,10 @@ -.. _getting_started: - ==================================== Getting Started with the LLVM System ==================================== +.. contents:: + :local: + Overview ======== @@ -68,33 +69,30 @@ Here's the short story for getting up and running quickly with LLVM: * ``../llvm/configure [options]`` Some common options: - * ``--prefix=directory`` --- + * ``--prefix=directory`` --- Specify for *directory* the full pathname of + where you want the LLVM tools and libraries to be installed (default + ``/usr/local``). - Specify for *directory* the full pathname of where you want the LLVM - tools and libraries to be installed (default ``/usr/local``). + * ``--enable-optimized`` --- Compile with optimizations enabled (default + is NO). - * ``--enable-optimized`` --- - - Compile with optimizations enabled (default is NO). - - * ``--enable-assertions`` --- - - Compile with assertion checks enabled (default is YES). + * ``--enable-assertions`` --- Compile with assertion checks enabled + (default is YES). * ``make [-j]`` --- The ``-j`` specifies the number of jobs (commands) to run simultaneously. This builds both LLVM and Clang for Debug+Asserts mode. - The --enabled-optimized configure option is used to specify a Release + The ``--enabled-optimized`` configure option is used to specify a Release build. * ``make check-all`` --- This run the regression tests to ensure everything is in working order. - + * ``make update`` --- This command is used to update all the svn repositories at once, rather then having to ``cd`` into the individual repositories and running ``svn update``. * It is also possible to use CMake instead of the makefiles. With CMake it is - also possible to generate project files for several IDEs: Eclipse CDT4, + possible to generate project files for several IDEs: Xcode, Eclipse CDT4, CodeBlocks, Qt-Creator (use the CodeBlocks generator), KDevelop3. * If you get an "internal compiler error (ICE)" or test failures, see @@ -126,6 +124,8 @@ LLVM is known to work on the following platforms: +-----------------+----------------------+-------------------------+ |Linux | amd64 | GCC | +-----------------+----------------------+-------------------------+ +|Linux | ARM\ :sup:`13` | GCC | ++-----------------+----------------------+-------------------------+ |Solaris | V9 (Ultrasparc) | GCC | +-----------------+----------------------+-------------------------+ |FreeBSD | x86\ :sup:`1` | GCC | @@ -161,8 +161,6 @@ LLVM has partial support for the following platforms: .. note:: - Code generation supported for Pentium processors and up - #. Code generation supported for Pentium processors and up #. Code generation supported for 32-bit ABI only #. No native code generation @@ -182,9 +180,9 @@ LLVM has partial support for the following platforms: Windows-specifics that will cause the build to fail. #. To use LLVM modules on Win32-based system, you may configure LLVM with ``--enable-shared``. - #. To compile SPU backend, you need to add ``LDFLAGS=-Wl,--stack,16777216`` to configure. + #. MCJIT not working well pre-v7, old JIT engine not supported any more. Note that you will need about 1-3 GB of space for a full LLVM build in Debug mode, depending on the system (it is so large because of all the debugging @@ -219,11 +217,7 @@ uses the package and provides other details. +--------------------------------------------------------------+-----------------+---------------------------------------------+ | `SVN `_ | >=1.3 | Subversion access to LLVM\ :sup:`2` | +--------------------------------------------------------------+-----------------+---------------------------------------------+ -| `DejaGnu `_ | 1.4.2 | Automated test suite\ :sup:`3` | -+--------------------------------------------------------------+-----------------+---------------------------------------------+ -| `tcl `_ | 8.3, 8.4 | Automated test suite\ :sup:`3` | -+--------------------------------------------------------------+-----------------+---------------------------------------------+ -| `expect `_ | 5.38.0 | Automated test suite\ :sup:`3` | +| `python `_ | >=2.4 | Automated test suite\ :sup:`3` | +--------------------------------------------------------------+-----------------+---------------------------------------------+ | `perl `_ | >=5.6.0 | Utilities | +--------------------------------------------------------------+-----------------+---------------------------------------------+ @@ -368,6 +362,9 @@ optimizations are turned on. The symptom is an infinite loop in ``-O0``. A test failure in ``test/Assembler/alignstack.ll`` is one symptom of the problem. +**GCC 4.6.3 on ARM**: Miscompiles ``llvm-readobj`` at ``-O3``. A test failure +in ``test/Object/readobj-shared-object.test`` is one symptom of the problem. + **GNU ld 2.16.X**. Some 2.16.X versions of the ld linker will produce very long warning messages complaining that some "``.gnu.linkonce.t.*``" symbol was defined in a discarded section. You can safely ignore these messages as they are @@ -384,6 +381,14 @@ intermittent failures when building LLVM with position independent code. The symptom is an error about cyclic dependencies. We recommend upgrading to a newer version of Gold. +**Clang 3.0 with libstdc++ 4.7.x**: a few Linux distributions (Ubuntu 12.10, +Fedora 17) have both Clang 3.0 and libstdc++ 4.7 in their repositories. Clang +3.0 does not implement a few builtins that are used in this library. We +recommend using the system GCC to compile LLVM and Clang in this case. + +**Clang 3.0 on Mageia 2**. There's a packaging issue: Clang can not find at +least some (``cxxabi.h``) libstdc++ headers. + .. _Getting Started with LLVM: Getting Started with LLVM @@ -459,6 +464,8 @@ The files are as follows, with *x.y* marking the version number: Binary release of the llvm-gcc-4.2 front end for a specific platform. +.. _checkout: + Checkout LLVM from Subversion ----------------------------- @@ -505,7 +512,7 @@ directory: If you would like to get the LLVM test suite (a separate package as of 1.4), you get it from the Subversion repository: -.. code-block:: bash +.. code-block:: console % cd llvm/projects % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite @@ -514,33 +521,46 @@ By placing it in the ``llvm/projects``, it will be automatically configured by the LLVM configure script as well as automatically updated when you run ``svn update``. -GIT mirror +Git Mirror ---------- -GIT mirrors are available for a number of LLVM subprojects. These mirrors sync +Git mirrors are available for a number of LLVM subprojects. These mirrors sync automatically with each Subversion commit and contain all necessary git-svn marks (so, you can recreate git-svn metadata locally). Note that right now -mirrors reflect only ``trunk`` for each project. You can do the read-only GIT +mirrors reflect only ``trunk`` for each project. You can do the read-only Git clone of LLVM via: -.. code-block:: bash +.. code-block:: console % git clone http://llvm.org/git/llvm.git If you want to check out clang too, run: -.. code-block:: bash +.. code-block:: console - % git clone http://llvm.org/git/llvm.git % cd llvm/tools % git clone http://llvm.org/git/clang.git +If you want to check out compiler-rt too, run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/compiler-rt.git + +If you want to check out the Test Suite Source Code (optional), run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/test-suite.git + Since the upstream repository is in Subversion, you should use ``git pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history in your clone. To configure ``git pull`` to pass ``--rebase`` by default on the master branch, run the following command: -.. code-block:: bash +.. code-block:: console % git config branch.master.rebase true @@ -553,13 +573,13 @@ Assume ``master`` points the upstream and ``mybranch`` points your working branch, and ``mybranch`` is rebased onto ``master``. At first you may check sanity of whitespaces: -.. code-block:: bash +.. code-block:: console % git diff --check master..mybranch The easiest way to generate a patch is as below: -.. code-block:: bash +.. code-block:: console % git diff master..mybranch > /path/to/mybranch.diff @@ -570,20 +590,20 @@ could be accepted with ``patch -p1 -N``. But you may generate patchset with git-format-patch. It generates by-each-commit patchset. To generate patch files to attach to your article: -.. code-block:: bash +.. code-block:: console % git format-patch --no-attach master..mybranch -o /path/to/your/patchset If you would like to send patches directly, you may use git-send-email or git-imap-send. Here is an example to generate the patchset in Gmail's [Drafts]. -.. code-block:: bash +.. code-block:: console % git format-patch --attach master..mybranch --stdout | git imap-send Then, your .git/config should have [imap] sections. -.. code-block:: bash +.. code-block:: ini [imap] host = imaps://imap.gmail.com @@ -603,7 +623,7 @@ For developers to work with git-svn To set up clone from which you can submit code using ``git-svn``, run: -.. code-block:: bash +.. code-block:: console % git clone http://llvm.org/git/llvm.git % cd llvm @@ -619,10 +639,12 @@ To set up clone from which you can submit code using ``git-svn``, run: % git config svn-remote.svn.fetch :refs/remotes/origin/master % git svn rebase -l -To update this clone without generating git-svn tags that conflict with the -upstream git repo, run: +Likewise for compiler-rt and test-suite. -.. code-block:: bash +To update this clone without generating git-svn tags that conflict with the +upstream Git repo, run: + +.. code-block:: console % git fetch && (cd tools/clang && git fetch) # Get matching revisions of both trees. % git checkout master @@ -631,20 +653,65 @@ upstream git repo, run: git checkout master && git svn rebase -l) +Likewise for compiler-rt and test-suite. + This leaves your working directories on their master branches, so you'll need to ``checkout`` each working branch individually and ``rebase`` it on top of its -parent branch. (Note: This script is intended for relative newbies to git. If -you have more experience, you can likely improve on it.) +parent branch. + +For those who wish to be able to update an llvm repo in a simpler fashion, +consider placing the following Git script in your path under the name +``git-svnup``: + +.. code-block:: bash + + #!/bin/bash + + STATUS=$(git status -s | grep -v "??") + + if [ ! -z "$STATUS" ]; then + STASH="yes" + git stash >/dev/null + fi + + git fetch + OLD_BRANCH=$(git rev-parse --abbrev-ref HEAD) + git checkout master 2> /dev/null + git svn rebase -l + git checkout $OLD_BRANCH 2> /dev/null + + if [ ! -z $STASH ]; then + git stash pop >/dev/null + fi + +Then to perform the aforementioned update steps go into your source directory +and just type ``git-svnup`` or ``git svnup`` and everything will just work. + +To commit back changes via git-svn, use ``dcommit``: + +.. code-block:: console + + % git svn dcommit + +Note that git-svn will create one SVN commit for each Git commit you have pending, +so squash and edit each commit before executing ``dcommit`` to make sure they all +conform to the coding standards and the developers' policy. + +On success, ``dcommit`` will rebase against the HEAD of SVN, so to avoid conflict, +please make sure your current branch is up-to-date (via fetch/rebase) before +proceeding. The git-svn metadata can get out of sync after you mess around with branches and ``dcommit``. When that happens, ``git svn dcommit`` stops working, complaining about files with uncommitted changes. The fix is to rebuild the metadata: -.. code-block:: bash +.. code-block:: console % rm -rf .git/svn % git svn rebase -l +Please, refer to the Git-SVN manual (``man git-svn``) for more information. + Local LLVM Configuration ------------------------ @@ -661,14 +728,15 @@ configure the build system: | Variable | Purpose | +============+===========================================================+ | CC | Tells ``configure`` which C compiler to use. By default, | -| | ``configure`` will look for the first GCC C compiler in | -| | ``PATH``. Use this variable to override ``configure``\'s | -| | default behavior. | +| | ``configure`` will check ``PATH`` for ``clang`` and GCC C | +| | compilers (in this order). Use this variable to override | +| | ``configure``\'s default behavior. | +------------+-----------------------------------------------------------+ | CXX | Tells ``configure`` which C++ compiler to use. By | -| | default, ``configure`` will look for the first GCC C++ | -| | compiler in ``PATH``. Use this variable to override | -| | ``configure``'s default behavior. | +| | default, ``configure`` will check ``PATH`` for | +| | ``clang++`` and GCC C++ compilers (in this order). Use | +| | this variable to override ``configure``'s default | +| | behavior. | +------------+-----------------------------------------------------------+ The following options can be used to set or enable LLVM specific options: @@ -722,13 +790,13 @@ To configure LLVM, follow these steps: #. Change directory into the object root directory: - .. code-block:: bash + .. code-block:: console % cd OBJ_ROOT #. Run the ``configure`` script located in the LLVM source tree: - .. code-block:: bash + .. code-block:: console % SRC_ROOT/configure --prefix=/install/path [other options] @@ -764,7 +832,7 @@ Profile Builds Once you have LLVM configured, you can build it by entering the *OBJ_ROOT* directory and issuing the following command: -.. code-block:: bash +.. code-block:: console % gmake @@ -775,7 +843,7 @@ If you have multiple processors in your machine, you may wish to use some of the parallel build options provided by GNU Make. For example, you could use the command: -.. code-block:: bash +.. code-block:: console % gmake -j2 @@ -842,12 +910,39 @@ any subdirectories that it contains. Entering any directory inside the LLVM object tree and typing ``gmake`` should rebuild anything in or below that directory that is out of date. +This does not apply to building the documentation. +LLVM's (non-Doxygen) documentation is produced with the +`Sphinx `_ documentation generation system. +There are some HTML documents that have not yet been converted to the new +system (which uses the easy-to-read and easy-to-write +`reStructuredText `_ plaintext markup +language). +The generated documentation is built in the ``SRC_ROOT/docs`` directory using +a special makefile. +For instructions on how to install Sphinx, see +`Sphinx Introduction for LLVM Developers +`_. +After following the instructions there for installing Sphinx, build the LLVM +HTML documentation by doing the following: + +.. code-block:: console + + $ cd SRC_ROOT/docs + $ make -f Makefile.sphinx + +This creates a ``_build/html`` sub-directory with all of the HTML files, not +just the generated ones. +This directory corresponds to ``llvm.org/docs``. +For example, ``_build/html/SphinxQuickstartTemplate.html`` corresponds to +``llvm.org/docs/SphinxQuickstartTemplate.html``. +The :doc:`SphinxQuickstartTemplate` is useful when creating a new document. + Cross-Compiling LLVM -------------------- It is possible to cross-compile LLVM itself. That is, you can create LLVM executables and libraries to be hosted on a platform different from the platform -where they are build (a Canadian Cross build). To configure a cross-compile, +where they are built (a Canadian Cross build). To configure a cross-compile, supply the configure script with ``--build`` and ``--host`` options that are different. The values of these options must be legal target triples that your GCC compiler supports. @@ -866,13 +961,13 @@ This is accomplished in the typical autoconf manner: * Change directory to where the LLVM object files should live: - .. code-block:: bash + .. code-block:: console % cd OBJ_ROOT * Run the ``configure`` script found in the LLVM source directory: - .. code-block:: bash + .. code-block:: console % SRC_ROOT/configure @@ -913,12 +1008,12 @@ Optional Configuration Items ---------------------------- If you're running on a Linux system that supports the `binfmt_misc -`_ +`_ module, and you have root access on the system, you can set your system up to execute LLVM bitcode files directly. To do this, use commands like this (the first command may not be required if you are already using the module): -.. code-block:: bash +.. code-block:: console % mount -t binfmt_misc none /proc/sys/fs/binfmt_misc % echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register @@ -928,7 +1023,7 @@ first command may not be required if you are already using the module): This allows you to execute LLVM bitcode files directly. On Debian, you can also use this command instead of the 'echo' command above: -.. code-block:: bash +.. code-block:: console % sudo update-binfmts --install llvm /path/to/lli --magic 'BC' @@ -1073,8 +1168,8 @@ module that must be checked out (usually to ``projects/test-suite``). This module contains a comprehensive correctness, performance, and benchmarking test suite for LLVM. It is a separate Subversion module because not every LLVM user is interested in downloading or building such a comprehensive test suite. For -further details on this test suite, please see the `Testing -Guide `_ document. +further details on this test suite, please see the :doc:`Testing Guide +` document. .. _tools: @@ -1219,7 +1314,7 @@ Example with clang #. Next, compile the C file into a native executable: - .. code-block:: bash + .. code-block:: console % clang hello.c -o hello @@ -1230,7 +1325,7 @@ Example with clang #. Next, compile the C file into a LLVM bitcode file: - .. code-block:: bash + .. code-block:: console % clang -O3 -emit-llvm hello.c -c -o hello.bc @@ -1240,42 +1335,42 @@ Example with clang #. Run the program in both forms. To run the program, use: - .. code-block:: bash + .. code-block:: console % ./hello and - .. code-block:: bash + .. code-block:: console % lli hello.bc - The second examples shows how to invoke the LLVM JIT, `lli - `_. + The second examples shows how to invoke the LLVM JIT, :doc:`lli + `. #. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code: - .. code-block:: bash + .. code-block:: console % llvm-dis < hello.bc | less #. Compile the program to native assembly using the LLC code generator: - .. code-block:: bash + .. code-block:: console % llc hello.bc -o hello.s #. Assemble the native assembly language file into a program: - .. code-block:: bash + .. code-block:: console - **Solaris:** % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native + % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native # On Solaris - **Others:** % gcc hello.s -o hello.native + % gcc hello.s -o hello.native # On others #. Execute the native code program: - .. code-block:: bash + .. code-block:: console % ./hello.native diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst index 35f97f04b9d5..4c80f2c57bfa 100644 --- a/docs/GettingStartedVS.rst +++ b/docs/GettingStartedVS.rst @@ -1,5 +1,3 @@ -.. _winvs: - ================================================================== Getting Started with the LLVM System using Microsoft Visual Studio ================================================================== diff --git a/docs/GoldPlugin.rst b/docs/GoldPlugin.rst index 300aea9f9a49..17bbeb8ba9f8 100644 --- a/docs/GoldPlugin.rst +++ b/docs/GoldPlugin.rst @@ -1,11 +1,7 @@ -.. _gold-plugin: - ==================== The LLVM gold plugin ==================== -.. sectionauthor:: Nick Lewycky - Introduction ============ diff --git a/docs/HowToAddABuilder.rst b/docs/HowToAddABuilder.rst index b0cd2907f975..893f12d19d55 100644 --- a/docs/HowToAddABuilder.rst +++ b/docs/HowToAddABuilder.rst @@ -1,11 +1,7 @@ -.. _how_to_add_a_builder: - =================================================================== How To Add Your Build Configuration To LLVM Buildbot Infrastructure =================================================================== -.. sectionauthor:: Galina Kistanova - Introduction ============ diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst index d786a7dedaf4..32ae39ba6883 100644 --- a/docs/HowToBuildOnARM.rst +++ b/docs/HowToBuildOnARM.rst @@ -1,11 +1,7 @@ -.. _how_to_build_on_arm: - =================================================================== How To Build On ARM =================================================================== -.. sectionauthor:: Wei-Ren Chen (陳韋任) - Introduction ============ @@ -40,8 +36,8 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. .. code-block:: bash - ./configure --build=armv7l-unknown-linux-gnueabihf - --host=armv7l-unknown-linux-gnueabihf - --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 - --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon - --enable-targets=arm --disable-optimized --enable-assertions + ./configure --build=armv7l-unknown-linux-gnueabihf \ + --host=armv7l-unknown-linux-gnueabihf \ + --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 \ + --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon \ + --enable-targets=arm --enable-optimized --enable-assertions diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html deleted file mode 100644 index 6fdec2cfee79..000000000000 --- a/docs/HowToReleaseLLVM.html +++ /dev/null @@ -1,581 +0,0 @@ - - - - - How To Release LLVM To The Public - - - - -

How To Release LLVM To The Public

-
    -
  1. Introduction
  2. -
  3. Qualification Criteria
  4. -
  5. Release Timeline
  6. -
  7. Release Process
  8. -
- - - -

Introduction

- - -
- -

This document contains information about successfully releasing LLVM — - including subprojects: e.g., clang and dragonegg — to - the public. It is the Release Manager's responsibility to ensure that a high - quality build of LLVM is released.

- -
- - -

Release Timeline

- -
- -

LLVM is released on a time based schedule — roughly every 6 months. We - do not normally have dot releases because of the nature of LLVM's incremental - development philosophy. That said, the only thing preventing dot releases for - critical bug fixes from happening is a lack of resources — testers, - machines, time, etc. And, because of the high quality we desire for LLVM - releases, we cannot allow for a truncated form of release qualification.

- -

The release process is roughly as follows:

- -
    -
  • Set code freeze and branch creation date for 6 months after last code - freeze date. Announce release schedule to the LLVM community and update - the website.

  • - -
  • Create release branch and begin release process.

  • - -
  • Send out release candidate sources for first round of testing. Testing - lasts 7-10 days. During the first round of testing, any regressions found - should be fixed. Patches are merged from mainline into the release - branch. Also, all features need to be completed during this time. Any - features not completed at the end of the first round of testing will be - removed or disabled for the release.

  • - -
  • Generate and send out the second release candidate sources. Only - critial bugs found during this testing phase will be fixed. Any - bugs introduced by merged patches will be fixed. If so a third round of - testing is needed.

  • - -
  • The release notes are updated.

  • - -
  • Finally, release!

  • -
- -
- - -

Release Process

- - -
- -
    -
  1. Release Administrative Tasks -
      -
    1. Create Release Branch
    2. -
    3. Update Version Numbers
    4. -
    -
  2. -
  3. Building the Release -
      -
    1. Build the LLVM Source Distributions
    2. -
    3. Build LLVM
    4. -
    5. Build the Clang Binary Distribution
    6. -
    7. Target Specific Build Details
    8. -
    -
  4. -
  5. Release Qualification Criteria -
      -
    1. Qualify LLVM
    2. -
    3. Qualify Clang
    4. -
    5. Specific Target Qualification Details
    6. -
    -
  6. - -
  7. Community Testing
  8. -
  9. Release Patch Rules
  10. -
  11. Release final tasks -
      -
    1. Update Documentation
    2. -
    3. Tag the LLVM Final Release
    4. -
    5. Update the LLVM Demo Page
    6. -
    7. Update the LLVM Website
    8. -
    9. Announce the Release
    10. -
    -
  12. -
- - -

Release Administrative Tasks

- -
- -

This section describes a few administrative tasks that need to be done for - the release process to begin. Specifically, it involves:

- -
    -
  • Creating the release branch,
  • -
  • Setting version numbers, and
  • -
  • Tagging release candidates for the release team to begin testing
  • -
- - -

Create Release Branch

- -
- -

Branch the Subversion trunk using the following procedure:

- -
    -
  1. Remind developers that the release branching is imminent and to refrain - from committing patches that might break the build. E.g., new features, - large patches for works in progress, an overhaul of the type system, an - exciting new TableGen feature, etc.

  2. - -
  3. Verify that the current Subversion trunk is in decent shape by - examining nightly tester and buildbot results.

  4. - -
  5. Create the release branch for llvm, clang, - the test-suite, and dragonegg from the last known good - revision. The branch's name is release_XY, - where X is the major and Y the minor release - numbers. The branches should be created using the following commands:

    - -
    -
    -$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
    -           https://llvm.org/svn/llvm-project/llvm/branches/release_XY
    -
    -$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
    -           https://llvm.org/svn/llvm-project/cfe/branches/release_XY
    -
    -$ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \
    -           https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY
    -
    -$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
    -           https://llvm.org/svn/llvm-project/test-suite/branches/release_XY
    -
    -
  6. - -
  7. Advise developers that they may now check their patches into the - Subversion tree again.

  8. - -
  9. The Release Manager should switch to the release branch, because all - changes to the release will now be done in the branch. The easiest way to - do this is to grab a working copy using the following commands:

    - -
    -
    -$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XY llvm-X.Y
    -
    -$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XY clang-X.Y
    -
    -$ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY dragonegg-X.Y
    -
    -$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XY test-suite-X.Y
    -
    -
  10. -
- -
- - -

Update LLVM Version

- -
- -

After creating the LLVM release branch, update the release branches' - autoconf and configure.ac versions from 'X.Ysvn' - to 'X.Y'. Update it on mainline as well to be the next version - ('X.Y+1svn'). Regenerate the configure scripts for both - llvm and the test-suite.

- -

In addition, the version numbers of all the Bugzilla components must be - updated for the next release.

- -
- - -

Build the LLVM Release Candidates

- -
- -

Create release candidates for llvm, clang, - dragonegg, and the LLVM test-suite by tagging the branch - with the respective release candidate number. For instance, to - create Release Candidate 1 you would issue the following commands:

- -
-
-$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1
-
-
- -

Similarly, Release Candidate 2 would be named RC2 and so - on. This keeps a permanent copy of the release candidate around for people to - export and build as they wish. The final released sources will be tagged in - the RELEASE_XY directory as Final - (c.f. Tag the LLVM Final Release).

- -

The Release Manager may supply pre-packaged source tarballs for users. This - can be done with the following commands:

- -
-
-$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 llvm-X.Yrc1
-$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 clang-X.Yrc1
-$ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1 dragonegg-X.Yrc1
-$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 llvm-test-X.Yrc1
-
-$ tar -cvf - llvm-X.Yrc1        | gzip > llvm-X.Yrc1.src.tar.gz
-$ tar -cvf - clang-X.Yrc1       | gzip > clang-X.Yrc1.src.tar.gz
-$ tar -cvf - dragonegg-X.Yrc1   | gzip > dragonegg-X.Yrc1.src.tar.gz
-$ tar -cvf - llvm-test-X.Yrc1   | gzip > llvm-test-X.Yrc1.src.tar.gz
-
-
- -
- -
- - -

Building the Release

- -
- -

The builds of llvm, clang, and dragonegg - must be free of errors and warnings in Debug, Release+Asserts, and - Release builds. If all builds are clean, then the release passes Build - Qualification.

- -

The make options for building the different modes:

- - - - - - -
ModeOptions
DebugENABLE_OPTIMIZED=0
Release+AssertsENABLE_OPTIMIZED=1
ReleaseENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
- - -

Build LLVM

- -
- -

Build Debug, Release+Asserts, and Release versions - of llvm on all supported platforms. Directions to build - llvm are here.

- -
- - -

Build Clang Binary Distribution

- -
- -

Creating the clang binary distribution - (Debug/Release+Asserts/Release) requires performing the following steps for - each supported platform:

- -
    -
  1. Build clang according to the directions - here.
  2. - -
  3. Build both a Debug and Release version of clang. The binary will be the - Release build.
  4. - -
  5. Package clang (details to follow).
  6. -
- -
- - -

Target Specific Build Details

- -
- -

The table below specifies which compilers are used for each Arch/OS - combination when qualifying the build of llvm, clang, - and dragonegg.

- - - - - - - - - - -
Architecture OS compiler
x86-32 Mac OS 10.5 gcc 4.0.1
x86-32 Linux gcc 4.2.X, gcc 4.3.X
x86-32 FreeBSD gcc 4.2.X
x86-32 mingw gcc 3.4.5
x86-64 Mac OS 10.5 gcc 4.0.1
x86-64 Linux gcc 4.2.X, gcc 4.3.X
x86-64 FreeBSD gcc 4.2.X
- -
- -
- - -

Building the Release

- -
- -

A release is qualified when it has no regressions from the previous release - (or baseline). Regressions are related to correctness first and performance - second. (We may tolerate some minor performance regressions if they are - deemed necessary for the general quality of the compiler.)

- -

Regressions are new failures in the set of tests that are used to qualify - each product and only include things on the list. Every release will have - some bugs in it. It is the reality of developing a complex piece of - software. We need a very concrete and definitive release criteria that - ensures we have monotonically improving quality on some metric. The metric we - use is described below. This doesn't mean that we don't care about other - criteria, but these are the criteria which we found to be most important and - which must be satisfied before a release can go out

- - -

Qualify LLVM

- -
- -

LLVM is qualified when it has a clean test run without a front-end. And it - has no regressions when using either clang or dragonegg - with the test-suite from the previous release.

- -
- - -

Qualify Clang

- -
- -

Clang is qualified when front-end specific tests in the - llvm dejagnu test suite all pass, clang's own test suite passes - cleanly, and there are no regressions in the test-suite.

- -
- - -

Specific Target Qualification Details

- -
- - - - - - - - - -
Architecture OS clang baseline tests
x86-32 Linux last release llvm dejagnu, clang tests, test-suite (including spec)
x86-32 FreeBSD last release llvm dejagnu, clang tests, test-suite
x86-32 mingw none QT
x86-64 Mac OS 10.X last release llvm dejagnu, clang tests, test-suite (including spec)
x86-64 Linux last release llvm dejagnu, clang tests, test-suite (including spec)
x86-64 FreeBSD last release llvm dejagnu, clang tests, test-suite
- -
- -
- - -

Community Testing

-
- -

Once all testing has been completed and appropriate bugs filed, the release - candidate tarballs are put on the website and the LLVM community is - notified. Ask that all LLVM developers test the release in 2 ways:

- -
    -
  1. Download llvm-X.Y, llvm-test-X.Y, and the - appropriate clang binary. Build LLVM. Run make check and - the full LLVM test suite (make TEST=nightly report).
  2. - -
  3. Download llvm-X.Y, llvm-test-X.Y, and the - clang sources. Compile everything. Run make check and - the full LLVM test suite (make TEST=nightly report).
  4. -
- -

Ask LLVM developers to submit the test suite report and make check - results to the list. Verify that there are no regressions from the previous - release. The results are not used to qualify a release, but to spot other - potential problems. For unsupported targets, verify that make check - is at least clean.

- -

During the first round of testing, all regressions must be fixed before the - second release candidate is tagged.

- -

If this is the second round of testing, the testing is only to ensure that - bug fixes previously merged in have not created new major problems. This - is not the time to solve additional and unrelated bugs! If no patches are - merged in, the release is determined to be ready and the release manager may - move onto the next stage.

- -
- - -

Release Patch Rules

- -
- -

Below are the rules regarding patching the release branch:

- -
    -
  1. Patches applied to the release branch may only be applied by the - release manager.

  2. - -
  3. During the first round of testing, patches that fix regressions or that - are small and relatively risk free (verified by the appropriate code - owner) are applied to the branch. Code owners are asked to be very - conservative in approving patches for the branch. We reserve the right to - reject any patch that does not fix a regression as previously - defined.

  4. - -
  5. During the remaining rounds of testing, only patches that fix critical - regressions may be applied.

  6. -
- -
- - -

Release Final Tasks

- -
- -

The final stages of the release process involves tagging the "final" release - branch, updating documentation that refers to the release, and updating the - demo page.

- - -

Update Documentation

- -
- -

Review the documentation and ensure that it is up to date. The "Release - Notes" must be updated to reflect new features, bug fixes, new known issues, - and changes in the list of supported platforms. The "Getting Started Guide" - should be updated to reflect the new release version number tag available from - Subversion and changes in basic system requirements. Merge both changes from - mainline into the release branch.

- -
- - -

Tag the LLVM Final Release

- -
- -

Tag the final release sources using the following procedure:

- -
-
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/Final
-
-
- -
- -
- - -

Update the LLVM Demo Page

- -
- -

The LLVM demo page must be updated to use the new release. This consists of - using the new clang binary and building LLVM.

- - -

Update the LLVM Website

- -
- -

The website must be updated before the release announcement is sent out. Here - is what to do:

- -
    -
  1. Check out the www module from Subversion.
  2. - -
  3. Create a new subdirectory X.Y in the releases directory.
  4. - -
  5. Commit the llvm, test-suite, clang source, - clang binaries, dragonegg source, and dragonegg - binaries in this new directory.
  6. - -
  7. Copy and commit the llvm/docs and LICENSE.txt files - into this new directory. The docs should be built with - BUILD_FOR_WEBSITE=1.
  8. - -
  9. Commit the index.html to the release/X.Y directory to - redirect (use from previous release.
  10. - -
  11. Update the releases/download.html file with the new release.
  12. - -
  13. Update the releases/index.html with the new release and link to - release documentation.
  14. - -
  15. Finally, update the main page (index.html and sidebar) to point - to the new release and release announcement. Make sure this all gets - committed back into Subversion.
  16. -
- -
- - -

Announce the Release

- -
- -

Have Chris send out the release announcement when everything is finished.

- -
- -
- -
- - -
-
- Valid CSS - Valid HTML 4.01 - The LLVM Compiler Infrastructure -
- Last modified: $Date: 2012-07-31 09:05:57 +0200 (Tue, 31 Jul 2012) $ -
- - diff --git a/docs/HowToReleaseLLVM.rst b/docs/HowToReleaseLLVM.rst new file mode 100644 index 000000000000..31877bd35ac8 --- /dev/null +++ b/docs/HowToReleaseLLVM.rst @@ -0,0 +1,422 @@ +================================= +How To Release LLVM To The Public +================================= + +.. contents:: + :local: + :depth: 1 + +Introduction +============ + +This document contains information about successfully releasing LLVM --- +including subprojects: e.g., ``clang`` and ``dragonegg`` --- to the public. It +is the Release Manager's responsibility to ensure that a high quality build of +LLVM is released. + +.. _timeline: + +Release Timeline +================ + +LLVM is released on a time based schedule --- roughly every 6 months. We do +not normally have dot releases because of the nature of LLVM's incremental +development philosophy. That said, the only thing preventing dot releases for +critical bug fixes from happening is a lack of resources --- testers, +machines, time, etc. And, because of the high quality we desire for LLVM +releases, we cannot allow for a truncated form of release qualification. + +The release process is roughly as follows: + +* Set code freeze and branch creation date for 6 months after last code freeze + date. Announce release schedule to the LLVM community and update the website. + +* Create release branch and begin release process. + +* Send out release candidate sources for first round of testing. Testing lasts + 7-10 days. During the first round of testing, any regressions found should be + fixed. Patches are merged from mainline into the release branch. Also, all + features need to be completed during this time. Any features not completed at + the end of the first round of testing will be removed or disabled for the + release. + +* Generate and send out the second release candidate sources. Only *critial* + bugs found during this testing phase will be fixed. Any bugs introduced by + merged patches will be fixed. If so a third round of testing is needed. + +* The release notes are updated. + +* Finally, release! + +Release Process +=============== + +.. contents:: + :local: + +Release Administrative Tasks +---------------------------- + +This section describes a few administrative tasks that need to be done for the +release process to begin. Specifically, it involves: + +* Creating the release branch, + +* Setting version numbers, and + +* Tagging release candidates for the release team to begin testing. + +Create Release Branch +^^^^^^^^^^^^^^^^^^^^^ + +Branch the Subversion trunk using the following procedure: + +#. Remind developers that the release branching is imminent and to refrain from + committing patches that might break the build. E.g., new features, large + patches for works in progress, an overhaul of the type system, an exciting + new TableGen feature, etc. + +#. Verify that the current Subversion trunk is in decent shape by + examining nightly tester and buildbot results. + +#. Create the release branch for ``llvm``, ``clang``, the ``test-suite``, and + ``dragonegg`` from the last known good revision. The branch's name is + ``release_XY``, where ``X`` is the major and ``Y`` the minor release + numbers. The branches should be created using the following commands: + + :: + + $ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \ + https://llvm.org/svn/llvm-project/llvm/branches/release_XY + + $ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \ + https://llvm.org/svn/llvm-project/cfe/branches/release_XY + + $ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \ + https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY + + $ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \ + https://llvm.org/svn/llvm-project/test-suite/branches/release_XY + +#. Advise developers that they may now check their patches into the Subversion + tree again. + +#. The Release Manager should switch to the release branch, because all changes + to the release will now be done in the branch. The easiest way to do this is + to grab a working copy using the following commands: + + :: + + $ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XY llvm-X.Y + + $ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XY clang-X.Y + + $ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY dragonegg-X.Y + + $ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XY test-suite-X.Y + +Update LLVM Version +^^^^^^^^^^^^^^^^^^^ + +After creating the LLVM release branch, update the release branches' +``autoconf`` and ``configure.ac`` versions from '``X.Ysvn``' to '``X.Y``'. +Update it on mainline as well to be the next version ('``X.Y+1svn``'). +Regenerate the configure scripts for both ``llvm`` and the ``test-suite``. + +In addition, the version numbers of all the Bugzilla components must be updated +for the next release. + +Build the LLVM Release Candidates +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Create release candidates for ``llvm``, ``clang``, ``dragonegg``, and the LLVM +``test-suite`` by tagging the branch with the respective release candidate +number. For instance, to create **Release Candidate 1** you would issue the +following commands: + +:: + + $ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY + $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \ + https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 + + $ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY + $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \ + https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 + + $ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY + $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \ + https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1 + + $ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY + $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \ + https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 + +Similarly, **Release Candidate 2** would be named ``RC2`` and so on. This keeps +a permanent copy of the release candidate around for people to export and build +as they wish. The final released sources will be tagged in the ``RELEASE_XY`` +directory as ``Final`` (c.f. :ref:`tag`). + +The Release Manager may supply pre-packaged source tarballs for users. This can +be done with the following commands: + +:: + + $ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 llvm-X.Yrc1 + $ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 clang-X.Yrc1 + $ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1 dragonegg-X.Yrc1 + $ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 llvm-test-X.Yrc1 + + $ tar -cvf - llvm-X.Yrc1 | gzip > llvm-X.Yrc1.src.tar.gz + $ tar -cvf - clang-X.Yrc1 | gzip > clang-X.Yrc1.src.tar.gz + $ tar -cvf - dragonegg-X.Yrc1 | gzip > dragonegg-X.Yrc1.src.tar.gz + $ tar -cvf - llvm-test-X.Yrc1 | gzip > llvm-test-X.Yrc1.src.tar.gz + +Building the Release +-------------------- + +The builds of ``llvm``, ``clang``, and ``dragonegg`` *must* be free of +errors and warnings in Debug, Release+Asserts, and Release builds. If all +builds are clean, then the release passes Build Qualification. + +The ``make`` options for building the different modes: + ++-----------------+---------------------------------------------+ +| Mode | Options | ++=================+=============================================+ +| Debug | ``ENABLE_OPTIMIZED=0`` | ++-----------------+---------------------------------------------+ +| Release+Asserts | ``ENABLE_OPTIMIZED=1`` | ++-----------------+---------------------------------------------+ +| Release | ``ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1`` | ++-----------------+---------------------------------------------+ + +Build LLVM +^^^^^^^^^^ + +Build ``Debug``, ``Release+Asserts``, and ``Release`` versions +of ``llvm`` on all supported platforms. Directions to build ``llvm`` +are :doc:`here `. + +Build Clang Binary Distribution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Creating the ``clang`` binary distribution (Debug/Release+Asserts/Release) +requires performing the following steps for each supported platform: + +#. Build clang according to the directions `here + `__. + +#. Build both a Debug and Release version of clang. The binary will be the + Release build. + +#. Package ``clang`` (details to follow). + +Target Specific Build Details +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The table below specifies which compilers are used for each Arch/OS combination +when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``. + ++--------------+---------------+----------------------+ +| Architecture | OS | compiler | ++==============+===============+======================+ +| x86-32 | Mac OS 10.5 | gcc 4.0.1 | ++--------------+---------------+----------------------+ +| x86-32 | Linux | gcc 4.2.X, gcc 4.3.X | ++--------------+---------------+----------------------+ +| x86-32 | FreeBSD | gcc 4.2.X | ++--------------+---------------+----------------------+ +| x86-32 | mingw | gcc 3.4.5 | ++--------------+---------------+----------------------+ +| x86-64 | Mac OS 10.5 | gcc 4.0.1 | ++--------------+---------------+----------------------+ +| x86-64 | Linux | gcc 4.2.X, gcc 4.3.X | ++--------------+---------------+----------------------+ +| x86-64 | FreeBSD | gcc 4.2.X | ++--------------+---------------+----------------------+ + +Release Qualification Criteria +------------------------------ + +A release is qualified when it has no regressions from the previous release (or +baseline). Regressions are related to correctness first and performance second. +(We may tolerate some minor performance regressions if they are deemed +necessary for the general quality of the compiler.) + +**Regressions are new failures in the set of tests that are used to qualify +each product and only include things on the list. Every release will have +some bugs in it. It is the reality of developing a complex piece of +software. We need a very concrete and definitive release criteria that +ensures we have monotonically improving quality on some metric. The metric we +use is described below. This doesn't mean that we don't care about other +criteria, but these are the criteria which we found to be most important and +which must be satisfied before a release can go out.** + +Qualify LLVM +^^^^^^^^^^^^ + +LLVM is qualified when it has a clean test run without a front-end. And it has +no regressions when using either ``clang`` or ``dragonegg`` with the +``test-suite`` from the previous release. + +Qualify Clang +^^^^^^^^^^^^^ + +``Clang`` is qualified when front-end specific tests in the ``llvm`` regression +test suite all pass, clang's own test suite passes cleanly, and there are no +regressions in the ``test-suite``. + +Specific Target Qualification Details +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ++--------------+-------------+----------------+-----------------------------+ +| Architecture | OS | clang baseline | tests | ++==============+=============+================+=============================+ +| x86-32 | Linux | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite (including spec) | ++--------------+-------------+----------------+-----------------------------+ +| x86-32 | FreeBSD | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite | ++--------------+-------------+----------------+-----------------------------+ +| x86-32 | mingw | none | QT | ++--------------+-------------+----------------+-----------------------------+ +| x86-64 | Mac OS 10.X | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite (including spec) | ++--------------+-------------+----------------+-----------------------------+ +| x86-64 | Linux | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite (including spec) | ++--------------+-------------+----------------+-----------------------------+ +| x86-64 | FreeBSD | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite | ++--------------+-------------+----------------+-----------------------------+ + +Community Testing +----------------- + +Once all testing has been completed and appropriate bugs filed, the release +candidate tarballs are put on the website and the LLVM community is notified. +Ask that all LLVM developers test the release in 2 ways: + +#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the appropriate ``clang`` + binary. Build LLVM. Run ``make check`` and the full LLVM test suite (``make + TEST=nightly report``). + +#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the ``clang`` sources. Compile + everything. Run ``make check`` and the full LLVM test suite (``make + TEST=nightly report``). + +Ask LLVM developers to submit the test suite report and ``make check`` results +to the list. Verify that there are no regressions from the previous release. +The results are not used to qualify a release, but to spot other potential +problems. For unsupported targets, verify that ``make check`` is at least +clean. + +During the first round of testing, all regressions must be fixed before the +second release candidate is tagged. + +If this is the second round of testing, the testing is only to ensure that bug +fixes previously merged in have not created new major problems. *This is not +the time to solve additional and unrelated bugs!* If no patches are merged in, +the release is determined to be ready and the release manager may move onto the +next stage. + +Release Patch Rules +------------------- + +Below are the rules regarding patching the release branch: + +#. Patches applied to the release branch may only be applied by the release + manager. + +#. During the first round of testing, patches that fix regressions or that are + small and relatively risk free (verified by the appropriate code owner) are + applied to the branch. Code owners are asked to be very conservative in + approving patches for the branch. We reserve the right to reject any patch + that does not fix a regression as previously defined. + +#. During the remaining rounds of testing, only patches that fix critical + regressions may be applied. + +Release Final Tasks +------------------- + +The final stages of the release process involves tagging the "final" release +branch, updating documentation that refers to the release, and updating the +demo page. + +Update Documentation +^^^^^^^^^^^^^^^^^^^^ + +Review the documentation and ensure that it is up to date. The "Release Notes" +must be updated to reflect new features, bug fixes, new known issues, and +changes in the list of supported platforms. The "Getting Started Guide" should +be updated to reflect the new release version number tag available from +Subversion and changes in basic system requirements. Merge both changes from +mainline into the release branch. + +.. _tag: + +Tag the LLVM Final Release +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Tag the final release sources using the following procedure: + +:: + + $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \ + https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/Final + + $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \ + https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/Final + + $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \ + https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/Final + + $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \ + https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/Final + +Update the LLVM Demo Page +------------------------- + +The LLVM demo page must be updated to use the new release. This consists of +using the new ``clang`` binary and building LLVM. + +Update the LLVM Website +^^^^^^^^^^^^^^^^^^^^^^^ + +The website must be updated before the release announcement is sent out. Here +is what to do: + +#. Check out the ``www`` module from Subversion. + +#. Create a new subdirectory ``X.Y`` in the releases directory. + +#. Commit the ``llvm``, ``test-suite``, ``clang`` source, ``clang binaries``, + ``dragonegg`` source, and ``dragonegg`` binaries in this new directory. + +#. Copy and commit the ``llvm/docs`` and ``LICENSE.txt`` files into this new + directory. The docs should be built with ``BUILD_FOR_WEBSITE=1``. + +#. Commit the ``index.html`` to the ``release/X.Y`` directory to redirect (use + from previous release). + +#. Update the ``releases/download.html`` file with the new release. + +#. Update the ``releases/index.html`` with the new release and link to release + documentation. + +#. Finally, update the main page (``index.html`` and sidebar) to point to the + new release and release announcement. Make sure this all gets committed back + into Subversion. + +Announce the Release +^^^^^^^^^^^^^^^^^^^^ + +Have Chris send out the release announcement when everything is finished. + diff --git a/docs/HowToSetUpLLVMStyleRTTI.rst b/docs/HowToSetUpLLVMStyleRTTI.rst index aa1ad84afee3..e0f865a141c7 100644 --- a/docs/HowToSetUpLLVMStyleRTTI.rst +++ b/docs/HowToSetUpLLVMStyleRTTI.rst @@ -1,11 +1,7 @@ -.. _how-to-set-up-llvm-style-rtti: - ====================================================== How to set up LLVM-style RTTI for your class hierarchy ====================================================== -.. sectionauthor:: Sean Silva - .. contents:: Background @@ -299,6 +295,78 @@ ordering right:: | OtherSpecialSquare | Circle +A Bug to be Aware Of +-------------------- + +The example just given opens the door to bugs where the ``classof``\s are +not updated to match the ``Kind`` enum when adding (or removing) classes to +(from) the hierarchy. + +Continuing the example above, suppose we add a ``SomewhatSpecialSquare`` as +a subclass of ``Square``, and update the ``ShapeKind`` enum like so: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + SK_SpecialSquare, + SK_OtherSpecialSquare, + + SK_SomewhatSpecialSquare, + SK_Circle + } + +Now, suppose that we forget to update ``Square::classof()``, so it still +looks like: + +.. code-block:: c++ + + static bool classof(const Shape *S) { + // BUG: Returns false when S->getKind() == SK_SomewhatSpecialSquare, + // even though SomewhatSpecialSquare "is a" Square. + return S->getKind() >= SK_Square && + S->getKind() <= SK_OtherSpecialSquare; + } + +As the comment indicates, this code contains a bug. A straightforward and +non-clever way to avoid this is to introduce an explicit ``SK_LastSquare`` +entry in the enum when adding the first subclass(es). For example, we could +rewrite the example at the beginning of `Concrete Bases and Deeper +Hierarchies`_ as: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + + SK_SpecialSquare, + + SK_OtherSpecialSquare, + + SK_LastSquare, + SK_Circle + } + ... + // Square::classof() + - static bool classof(const Shape *S) { + - return S->getKind() == SK_Square; + - } + + static bool classof(const Shape *S) { + + return S->getKind() >= SK_Square && + + S->getKind() <= SK_LastSquare; + + } + +Then, adding new subclasses is easy: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + SK_SpecialSquare, + SK_OtherSpecialSquare, + + SK_SomewhatSpecialSquare, + SK_LastSquare, + SK_Circle + } + +Notice that ``Square::classof`` does not need to be changed. + .. _classof-contract: The Contract of ``classof`` diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst index ff2d649ce33c..45be2826b301 100644 --- a/docs/HowToSubmitABug.rst +++ b/docs/HowToSubmitABug.rst @@ -1,11 +1,7 @@ -.. _how-to-submit-a-bug-report: - ================================ How to submit an LLVM bug report ================================ -.. sectionauthor:: Chris Lattner and Misha Brukman - Introduction - Got bugs? ======================== diff --git a/docs/HowToUseAttributes.rst b/docs/HowToUseAttributes.rst new file mode 100644 index 000000000000..66c44c01f631 --- /dev/null +++ b/docs/HowToUseAttributes.rst @@ -0,0 +1,81 @@ +===================== +How To Use Attributes +===================== + +.. contents:: + :local: + +Introduction +============ + +Attributes in LLVM have changed in some fundamental ways. It was necessary to +do this to support expanding the attributes to encompass more than a handful of +attributes --- e.g. command line options. The old way of handling attributes +consisted of representing them as a bit mask of values. This bit mask was +stored in a "list" structure that was reference counted. The advantage of this +was that attributes could be manipulated with 'or's and 'and's. The +disadvantage of this was that there was limited room for expansion, and +virtually no support for attribute-value pairs other than alignment. + +In the new scheme, an ``Attribute`` object represents a single attribute that's +uniqued. You use the ``Attribute::get`` methods to create a new ``Attribute`` +object. An attribute can be a single "enum" value (the enum being the +``Attribute::AttrKind`` enum), a string representing a target-dependent +attribute, or an attribute-value pair. Some examples: + +* Target-independent: ``noinline``, ``zext`` +* Target-dependent: ``"no-sse"``, ``"thumb2"`` +* Attribute-value pair: ``"cpu" = "cortex-a8"``, ``align = 4`` + +Note: for an attribute value pair, we expect a target-dependent attribute to +have a string for the value. + +``Attribute`` +============= +An ``Attribute`` object is designed to be passed around by value. + +Because attributes are no longer represented as a bit mask, you will need to +convert any code which does treat them as a bit mask to use the new query +methods on the Attribute class. + +``AttributeSet`` +================ + +The ``AttributeSet`` class replaces the old ``AttributeList`` class. The +``AttributeSet`` stores a collection of Attribute objects for each kind of +object that may have an attribute associated with it: the function as a +whole, the return type, or the function's parameters. A function's attributes +are at index ``AttributeSet::FunctionIndex``; the return type's attributes are +at index ``AttributeSet::ReturnIndex``; and the function's parameters' +attributes are at indices 1, ..., n (where 'n' is the number of parameters). +Most methods on the ``AttributeSet`` class take an index parameter. + +An ``AttributeSet`` is also a uniqued and immutable object. You create an +``AttributeSet`` through the ``AttributeSet::get`` methods. You can add and +remove attributes, which result in the creation of a new ``AttributeSet``. + +An ``AttributeSet`` object is designed to be passed around by value. + +Note: It is advised that you do *not* use the ``AttributeSet`` "introspection" +methods (e.g. ``Raw``, ``getRawPointer``, etc.). These methods break +encapsulation, and may be removed in a future release (i.e. LLVM 4.0). + +``AttrBuilder`` +=============== + +Lastly, we have a "builder" class to help create the ``AttributeSet`` object +without having to create several different intermediate uniqued +``AttributeSet`` objects. The ``AttrBuilder`` class allows you to add and +remove attributes at will. The attributes won't be uniqued until you call the +appropriate ``AttributeSet::get`` method. + +An ``AttrBuilder`` object is *not* designed to be passed around by value. It +should be passed by reference. + +Note: It is advised that you do *not* use the ``AttrBuilder::addRawValue()`` +method or the ``AttrBuilder(uint64_t Val)`` constructor. These are for +backwards compatibility and may be removed in a future release (i.e. LLVM 4.0). + +And that's basically it! A lot of functionality is hidden behind these classes, +but the interfaces are pretty straight forward. + diff --git a/docs/HowToUseInstrMappings.rst b/docs/HowToUseInstrMappings.rst index b51e74e23c29..8a3e7c8d726d 100755 --- a/docs/HowToUseInstrMappings.rst +++ b/docs/HowToUseInstrMappings.rst @@ -1,11 +1,7 @@ -.. _how_to_use_instruction_mappings: - =============================== How To Use Instruction Mappings =============================== -.. sectionauthor:: Jyotsna Verma - .. contents:: :local: @@ -120,7 +116,7 @@ to include relevant information in its definition. For example, consider following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false) instructions: -.. code-block::llvm +.. code-block:: llvm def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b), "$dst = add($a, $b)", @@ -141,7 +137,7 @@ In this step, we modify these instructions to include the information required by the relationship model, getPredOpcode, so that they can be related. -.. code-block::llvm +.. code-block:: llvm def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b), "$dst = add($a, $b)", diff --git a/docs/LLVMBuild.html b/docs/LLVMBuild.html deleted file mode 100644 index 9e7f8c765775..000000000000 --- a/docs/LLVMBuild.html +++ /dev/null @@ -1,368 +0,0 @@ - - - - - LLVMBuild Documentation - - - - -

LLVMBuild Guide

- -
    -
  1. Introduction
  2. -
  3. Project Organization
  4. -
  5. Build Integration
  6. -
  7. Component Overview
  8. -
  9. Format Reference
  10. -
- - -

Introduction

- - -
-

This document describes the LLVMBuild organization and files which - we use to describe parts of the LLVM ecosystem. For description of specific - LLVMBuild related tools, please see the command guide.

- -

LLVM is designed to be a modular set of libraries which can be flexibly - mixed together in order to build a variety of tools, like compilers, JITs, - custom code generators, optimization passes, interpreters, and so on. Related - projects in the LLVM system like Clang and LLDB also tend to follow this - philosophy.

- -

In order to support this usage style, LLVM has a fairly strict structure as - to how the source code and various components are organized. The - LLVMBuild.txt files are the explicit specification of that structure, - and are used by the build systems and other tools in order to develop the LLVM - project.

-
- - -

Project Organization

- - - - -
-

The source code for LLVM projects using the LLVMBuild system (LLVM, Clang, - and LLDB) is organized into components, which define the separate - pieces of functionality that make up the project. These projects may consist - of many libraries, associated tools, build tools, or other utility tools (for - example, testing tools).

- -

For the most part, the project contents are organized around defining one - main component per each subdirectory. Each such directory contains - an LLVMBuild.txt which contains the component definitions.

- -

The component descriptions for the project as a whole are automatically - gathered by the LLVMBuild tools. The tools automatically traverse the source - directory structure to find all of the component description files. NOTE: For - performance/sanity reasons, we only traverse into subdirectories when the - parent itself contains an LLVMBuild.txt description file.

-
- - -

Build Integration

- - -
-

The LLVMBuild files themselves are just a declarative way to describe the - project structure. The actual building of the LLVM project is handled by - another build system (currently we support - both Makefiles - and CMake.

- -

The build system implementation will load the relevant contents of the - LLVMBuild files and use that to drive the actual project build. Typically, the - build system will only need to load this information at "configure" time, and - use it to generative native information. Build systems will also handle - automatically reconfiguring their information when the contents of - the LLVMBuild.txt files change.

- -

Developers generally are not expected to need to be aware of the details of - how the LLVMBuild system is integrated into their build. Ideally, LLVM - developers who are not working on the build system would only ever need to - modify the contents of the LLVMBuild.txt description files (although we - have not reached this goal yet).

- -

For more information on the utility tool we provide to help interfacing - with the build system, please see - the llvm-build - documentation.

-
- - -

Component Overview

- - -
-

As mentioned earlier, LLVM projects are organized into - logical components. Every component is typically grouped into its - own subdirectory. Generally, a component is organized around a coherent group - of sources which have some kind of clear API separation from other parts of - the code.

- -

LLVM primarily uses the following types of components:

-
    -
  • Libraries - Library components define a distinct API which can - be independently linked into LLVM client applications. Libraries typically - have private and public header files, and may specify a link of required - libraries that they build on top of.
  • - -
  • Build Tools - Build tools are applications which are designed - to be run as part of the build process (typically to generate other source - files). Currently, LLVM uses one main build tool - called TableGen to generate a - variety of source files.
  • - -
  • Tools - Command line applications which are built using the - LLVM component libraries. Most LLVM tools are small and are primarily - frontends to the library interfaces.
  • - - -
- -

Components are described using LLVMBuild.txt files in the - directories that define the component. See - the Format Reference section for information on - the exact format of these files.

-
- - -

LLVMBuild Format Reference

- - -
-

LLVMBuild files are written in a simple variant of the INI or configuration - file format (Wikipedia - entry). The format defines a list of sections each of which may contain - some number of properties. A simple example of the file format is below:

-
-
-; Comments start with a semi-colon.
-
-; Sections are declared using square brackets.
-[component_0]
-
-; Properties are declared using '=' and are contained in the previous section.
-;
-; We support simple string and boolean scalar values and list values, where
-; items are separated by spaces. There is no support for quoting, and so
-; property values may not contain spaces.
-property_name = property_value
-list_property_name = value_1 value_2 ... value_n
-boolean_property_name = 1 (or 0)
-
-
- -

LLVMBuild files are expected to define a strict set of sections and - properties. An typical component description file for a library - component would look typically look like the following example:

-
-
-[component_0]
-type = Library
-name = Linker
-parent = Libraries
-required_libraries = Archive BitReader Core Support TransformUtils
-
-
- -

A full description of the exact sections and properties which are allowed - follows.

- -

Each file may define exactly one common component, named "common". The - common component may define the following properties:

-
    -
  • subdirectories [optional] -

    If given, a list of the names of the subdirectories from the current - subpath to search for additional LLVMBuild files.

  • -
- -

Each file may define multiple components. Each component is described by a - section who name starts with "component". The remainder of the section name is - ignored, but each section name must be unique. Typically components are just - number in order for files with multiple components ("component_0", - "component_1", and so on).

- -

Section names not matching this format (or the "common" section) are - currently unused and are disallowed.

- -

Every component is defined by the properties in the section. The exact list - of properties that are allowed depends on the component - type. Components may not define any properties other than those - expected by the component type.

- -

Every component must define the following properties:

-
    -
  • type [required] -

    The type of the component. Supported component types are - detailed below. Most components will define additional properties which - may be required or optional.

  • - -
  • name [required] -

    The name of the component. Names are required to be unique - across the entire project.

  • - -
  • parent [required] -

    The name of the logical parent of the component. Components are - organized into a logical tree to make it easier to navigate and organize - groups of components. The parents have no semantics as far as the project - build is concerned, however. Typically, the parent will be the main - component of the parent directory.

    - - - -

    Components may reference the root pseudo component using '$ROOT' to - indicate they should logically be grouped at the top-level.

    -
  • -
- -

Components may define the following properties:

-
    -
  • dependencies [optional] -

    If specified, a list of names of components which must be built - prior to this one. This should only be exactly those components which - produce some tool or source code required for building the - component.

    - -

    NOTE: Group and LibraryGroup components have no semantics for - the actual build, and are not allowed to specify dependencies.

  • -
- -

The following section lists the available component types, as well as the - properties which are associated with that component.

- -
    -
  • type = Group -

    Group components exist purely to allow additional arbitrary structuring - of the logical components tree. For example, one might define a - "Libraries" group to hold all of the root library components.

    - -

    Group components have no additionally properties.

    -
  • - -
  • type = Library -

    Library components define an individual library which should be built - from the source code in the component directory.

    - -

    Components with this type use the following properties:

    -
      -
    • library_name [optional] -

      If given, the name to use for the actual library file on disk. If - not given, the name is derived from the component name - itself.

    • - -
    • required_libraries [optional] -

      If given, a list of the names of Library or LibraryGroup components - which must also be linked in whenever this library is used. That is, - the link time dependencies for this component. When tools are built, - the build system will include the transitive closure of - all required_libraries for the components the tool needs.

    • - -
    • add_to_library_groups [optional] -

      If given, a list of the names of LibraryGroup components which this - component is also part of. This allows nesting groups of - components. For example, the X86 target might define a library - group for all of the X86 components. That library group might - then be included in the all-targets library group.

    • - -
    • installed [optional] [boolean] -

      Whether this library is installed. Libraries that are not installed - are only reported by llvm-config when it is run as part of a - development directory.

    • -
    -
  • - -
  • type = LibraryGroup -

    LibraryGroup components are a mechanism to allow easy definition of - useful sets of related components. In particular, we use them to easily - specify things like "all targets", or "all assembly printers".

    - -

    Components with this type use the following properties:

    -
      -
    • required_libraries [optional] -

      See the Library type for a description of this property.

    • - -
    • add_to_library_groups [optional] -

      See the Library type for a description of this property.

    • -
    -
  • - -
  • type = TargetGroup -

    TargetGroup components are an extension of LibraryGroups, specifically - for defining LLVM targets (which are handled specially in a few - places).

    - -

    The name of the component should always be the name of the target.

    - -

    Components with this type use the LibraryGroup properties in addition - to:

    -
      -
    • has_asmparser [optional] [boolean] -

      Whether this target defines an assembly parser.

    • -
    • has_asmprinter [optional] [boolean] -

      Whether this target defines an assembly printer.

    • -
    • has_disassembler [optional] [boolean] -

      Whether this target defines a disassembler.

    • -
    • has_jit [optional] [boolean] -

      Whether this target supports JIT compilation.

    • -
    -
  • - -
  • type = Tool -

    Tool components define standalone command line tools which should be - built from the source code in the component directory and linked.

    - -

    Components with this type use the following properties:

    -
      -
    • required_libraries [optional] - -

      If given, a list of the names of Library or LibraryGroup components - which this tool is required to be linked with. NOTE: The values - should be the component names, which may not always match up with the - actual library names on disk.

      - -

      Build systems are expected to properly include all of the libraries - required by the linked components (i.e., the transitive closer - of required_libraries).

      - -

      Build systems are also expected to understand that those library - components must be built prior to linking -- they do not also need to - be listed under dependencies.

    • -
    -
  • - -
  • type = BuildTool -

    BuildTool components are like Tool components, except that the tool is - supposed to be built for the platform where the build is running (instead - of that platform being targetted). Build systems are expected to handle - the fact that required libraries may need to be built for multiple - platforms in order to be able to link this tool.

    - -

    BuildTool components currently use the exact same properties as Tool - components, the type distinction is only used to differentiate what the - tool is built for.

    -
  • -
-
- - -
-
- Valid CSS - Valid HTML 4.01 - - The LLVM Compiler Infrastructure
- Last modified: $Date$ -
- - diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst new file mode 100644 index 000000000000..d9215dd8eb52 --- /dev/null +++ b/docs/LLVMBuild.rst @@ -0,0 +1,325 @@ +=============== +LLVMBuild Guide +=============== + +.. contents:: + :local: + +Introduction +============ + +This document describes the ``LLVMBuild`` organization and files which +we use to describe parts of the LLVM ecosystem. For description of +specific LLVMBuild related tools, please see the command guide. + +LLVM is designed to be a modular set of libraries which can be flexibly +mixed together in order to build a variety of tools, like compilers, +JITs, custom code generators, optimization passes, interpreters, and so +on. Related projects in the LLVM system like Clang and LLDB also tend to +follow this philosophy. + +In order to support this usage style, LLVM has a fairly strict structure +as to how the source code and various components are organized. The +``LLVMBuild.txt`` files are the explicit specification of that +structure, and are used by the build systems and other tools in order to +develop the LLVM project. + +Project Organization +==================== + +The source code for LLVM projects using the LLVMBuild system (LLVM, +Clang, and LLDB) is organized into *components*, which define the +separate pieces of functionality that make up the project. These +projects may consist of many libraries, associated tools, build tools, +or other utility tools (for example, testing tools). + +For the most part, the project contents are organized around defining +one main component per each subdirectory. Each such directory contains +an ``LLVMBuild.txt`` which contains the component definitions. + +The component descriptions for the project as a whole are automatically +gathered by the LLVMBuild tools. The tools automatically traverse the +source directory structure to find all of the component description +files. NOTE: For performance/sanity reasons, we only traverse into +subdirectories when the parent itself contains an ``LLVMBuild.txt`` +description file. + +Build Integration +================= + +The LLVMBuild files themselves are just a declarative way to describe +the project structure. The actual building of the LLVM project is +handled by another build system (currently we support both +:doc:`Makefiles ` and :doc:`CMake `). + +The build system implementation will load the relevant contents of the +LLVMBuild files and use that to drive the actual project build. +Typically, the build system will only need to load this information at +"configure" time, and use it to generative native information. Build +systems will also handle automatically reconfiguring their information +when the contents of the ``LLVMBuild.txt`` files change. + +Developers generally are not expected to need to be aware of the details +of how the LLVMBuild system is integrated into their build. Ideally, +LLVM developers who are not working on the build system would only ever +need to modify the contents of the ``LLVMBuild.txt`` description files +(although we have not reached this goal yet). + +For more information on the utility tool we provide to help interfacing +with the build system, please see the :doc:`llvm-build +` documentation. + +Component Overview +================== + +As mentioned earlier, LLVM projects are organized into logical +*components*. Every component is typically grouped into its own +subdirectory. Generally, a component is organized around a coherent +group of sources which have some kind of clear API separation from other +parts of the code. + +LLVM primarily uses the following types of components: + +- *Libraries* - Library components define a distinct API which can be + independently linked into LLVM client applications. Libraries typically + have private and public header files, and may specify a link of required + libraries that they build on top of. +- *Build Tools* - Build tools are applications which are designed to be run + as part of the build process (typically to generate other source files). + Currently, LLVM uses one main build tool called :doc:`TableGen + ` to generate a variety of source files. +- *Tools* - Command line applications which are built using the LLVM + component libraries. Most LLVM tools are small and are primarily + frontends to the library interfaces. + +Components are described using ``LLVMBuild.txt`` files in the directories +that define the component. See the `LLVMBuild Format Reference`_ section +for information on the exact format of these files. + +LLVMBuild Format Reference +========================== + +LLVMBuild files are written in a simple variant of the INI or configuration +file format (`Wikipedia entry`_). The format defines a list of sections +each of which may contain some number of properties. A simple example of +the file format is below: + +.. _Wikipedia entry: http://en.wikipedia.org/wiki/INI_file + +.. code-block:: ini + + ; Comments start with a semi-colon. + + ; Sections are declared using square brackets. + [component_0] + + ; Properties are declared using '=' and are contained in the previous section. + ; + ; We support simple string and boolean scalar values and list values, where + ; items are separated by spaces. There is no support for quoting, and so + ; property values may not contain spaces. + property_name = property_value + list_property_name = value_1 value_2 ... value_n + boolean_property_name = 1 (or 0) + +LLVMBuild files are expected to define a strict set of sections and +properties. An typical component description file for a library +component would look typically look like the following example: + +.. code-block:: ini + + [component_0] + type = Library + name = Linker + parent = Libraries + required_libraries = Archive BitReader Core Support TransformUtils + +A full description of the exact sections and properties which are +allowed follows. + +Each file may define exactly one common component, named ``common``. The +common component may define the following properties: + +- ``subdirectories`` **[optional]** + + If given, a list of the names of the subdirectories from the current + subpath to search for additional LLVMBuild files. + +Each file may define multiple components. Each component is described by a +section who name starts with ``component``. The remainder of the section +name is ignored, but each section name must be unique. Typically components +are just number in order for files with multiple components +(``component_0``, ``component_1``, and so on). + +.. warning:: + + Section names not matching this format (or the ``common`` section) are + currently unused and are disallowed. + +Every component is defined by the properties in the section. The exact +list of properties that are allowed depends on the component type. +Components **may not** define any properties other than those expected +by the component type. + +Every component must define the following properties: + +- ``type`` **[required]** + + The type of the component. Supported component types are detailed + below. Most components will define additional properties which may be + required or optional. + +- ``name`` **[required]** + + The name of the component. Names are required to be unique across the + entire project. + +- ``parent`` **[required]** + + The name of the logical parent of the component. Components are + organized into a logical tree to make it easier to navigate and + organize groups of components. The parents have no semantics as far + as the project build is concerned, however. Typically, the parent + will be the main component of the parent directory. + + Components may reference the root pseudo component using ``$ROOT`` to + indicate they should logically be grouped at the top-level. + +Components may define the following properties: + +- ``dependencies`` **[optional]** + + If specified, a list of names of components which *must* be built + prior to this one. This should only be exactly those components which + produce some tool or source code required for building the component. + + .. note:: + + ``Group`` and ``LibraryGroup`` components have no semantics for the + actual build, and are not allowed to specify dependencies. + +The following section lists the available component types, as well as +the properties which are associated with that component. + +- ``type = Group`` + + Group components exist purely to allow additional arbitrary structuring + of the logical components tree. For example, one might define a + ``Libraries`` group to hold all of the root library components. + + ``Group`` components have no additionally properties. + +- ``type = Library`` + + Library components define an individual library which should be built + from the source code in the component directory. + + Components with this type use the following properties: + + - ``library_name`` **[optional]** + + If given, the name to use for the actual library file on disk. If + not given, the name is derived from the component name itself. + + - ``required_libraries`` **[optional]** + + If given, a list of the names of ``Library`` or ``LibraryGroup`` + components which must also be linked in whenever this library is + used. That is, the link time dependencies for this component. When + tools are built, the build system will include the transitive closure + of all ``required_libraries`` for the components the tool needs. + + - ``add_to_library_groups`` **[optional]** + + If given, a list of the names of ``LibraryGroup`` components which + this component is also part of. This allows nesting groups of + components. For example, the ``X86`` target might define a library + group for all of the ``X86`` components. That library group might + then be included in the ``all-targets`` library group. + + - ``installed`` **[optional]** **[boolean]** + + Whether this library is installed. Libraries that are not installed + are only reported by ``llvm-config`` when it is run as part of a + development directory. + +- ``type = LibraryGroup`` + + ``LibraryGroup`` components are a mechanism to allow easy definition of + useful sets of related components. In particular, we use them to easily + specify things like "all targets", or "all assembly printers". + + Components with this type use the following properties: + + - ``required_libraries`` **[optional]** + + See the ``Library`` type for a description of this property. + + - ``add_to_library_groups`` **[optional]** + + See the ``Library`` type for a description of this property. + +- ``type = TargetGroup`` + + ``TargetGroup`` components are an extension of ``LibraryGroup``\s, + specifically for defining LLVM targets (which are handled specially in a + few places). + + The name of the component should always be the name of the target. + + Components with this type use the ``LibraryGroup`` properties in + addition to: + + - ``has_asmparser`` **[optional]** **[boolean]** + + Whether this target defines an assembly parser. + + - ``has_asmprinter`` **[optional]** **[boolean]** + + Whether this target defines an assembly printer. + + - ``has_disassembler`` **[optional]** **[boolean]** + + Whether this target defines a disassembler. + + - ``has_jit`` **[optional]** **[boolean]** + + Whether this target supports JIT compilation. + +- ``type = Tool`` + + ``Tool`` components define standalone command line tools which should be + built from the source code in the component directory and linked. + + Components with this type use the following properties: + + - ``required_libraries`` **[optional]** + + If given, a list of the names of ``Library`` or ``LibraryGroup`` + components which this tool is required to be linked with. + + .. note:: + + The values should be the component names, which may not always + match up with the actual library names on disk. + + Build systems are expected to properly include all of the libraries + required by the linked components (i.e., the transitive closure of + ``required_libraries``). + + Build systems are also expected to understand that those library + components must be built prior to linking -- they do not also need + to be listed under ``dependencies``. + +- ``type = BuildTool`` + + ``BuildTool`` components are like ``Tool`` components, except that the + tool is supposed to be built for the platform where the build is running + (instead of that platform being targetted). Build systems are expected + to handle the fact that required libraries may need to be built for + multiple platforms in order to be able to link this tool. + + ``BuildTool`` components currently use the exact same properties as + ``Tool`` components, the type distinction is only used to differentiate + what the tool is built for. + diff --git a/docs/LangRef.html b/docs/LangRef.html deleted file mode 100644 index 13daa65ca358..000000000000 --- a/docs/LangRef.html +++ /dev/null @@ -1,8776 +0,0 @@ - - - - LLVM Assembly Language Reference Manual - - - - - - - - -

LLVM Language Reference Manual

-
    -
  1. Abstract
  2. -
  3. Introduction
  4. -
  5. Identifiers
  6. -
  7. High Level Structure -
      -
    1. Module Structure
    2. -
    3. Linkage Types -
        -
      1. 'private' Linkage
      2. -
      3. 'linker_private' Linkage
      4. -
      5. 'linker_private_weak' Linkage
      6. -
      7. 'internal' Linkage
      8. -
      9. 'available_externally' Linkage
      10. -
      11. 'linkonce' Linkage
      12. -
      13. 'common' Linkage
      14. -
      15. 'weak' Linkage
      16. -
      17. 'appending' Linkage
      18. -
      19. 'extern_weak' Linkage
      20. -
      21. 'linkonce_odr' Linkage
      22. -
      23. 'linkonce_odr_auto_hide' Linkage
      24. -
      25. 'weak_odr' Linkage
      26. -
      27. 'external' Linkage
      28. -
      29. 'dllimport' Linkage
      30. -
      31. 'dllexport' Linkage
      32. -
      -
    4. -
    5. Calling Conventions
    6. -
    7. Named Types
    8. -
    9. Global Variables
    10. -
    11. Functions
    12. -
    13. Aliases
    14. -
    15. Named Metadata
    16. -
    17. Parameter Attributes
    18. -
    19. Function Attributes
    20. -
    21. Garbage Collector Names
    22. -
    23. Module-Level Inline Assembly
    24. -
    25. Data Layout
    26. -
    27. Pointer Aliasing Rules
    28. -
    29. Volatile Memory Accesses
    30. -
    31. Memory Model for Concurrent Operations
    32. -
    33. Atomic Memory Ordering Constraints
    34. -
    -
  8. -
  9. Type System -
      -
    1. Type Classifications
    2. -
    3. Primitive Types -
        -
      1. Integer Type
      2. -
      3. Floating Point Types
      4. -
      5. X86mmx Type
      6. -
      7. Void Type
      8. -
      9. Label Type
      10. -
      11. Metadata Type
      12. -
      -
    4. -
    5. Derived Types -
        -
      1. Aggregate Types -
          -
        1. Array Type
        2. -
        3. Structure Type
        4. -
        5. Opaque Structure Types
        6. -
        7. Vector Type
        8. -
        -
      2. -
      3. Function Type
      4. -
      5. Pointer Type
      6. -
      -
    6. -
    -
  10. -
  11. Constants -
      -
    1. Simple Constants
    2. -
    3. Complex Constants
    4. -
    5. Global Variable and Function Addresses
    6. -
    7. Undefined Values
    8. -
    9. Poison Values
    10. -
    11. Addresses of Basic Blocks
    12. -
    13. Constant Expressions
    14. -
    -
  12. -
  13. Other Values -
      -
    1. Inline Assembler Expressions
    2. -
    3. Metadata Nodes and Metadata Strings -
        -
      1. 'tbaa' Metadata
      2. -
      3. 'tbaa.struct' Metadata
      4. -
      5. 'fpmath' Metadata
      6. -
      7. 'range' Metadata
      8. -
      -
    4. -
    -
  14. -
  15. Module Flags Metadata -
      -
    1. Objective-C Garbage Collection Module Flags Metadata
    2. -
    -
  16. -
  17. Intrinsic Global Variables -
      -
    1. The 'llvm.used' Global Variable
    2. -
    3. The 'llvm.compiler.used' - Global Variable
    4. -
    5. The 'llvm.global_ctors' - Global Variable
    6. -
    7. The 'llvm.global_dtors' - Global Variable
    8. -
    -
  18. -
  19. Instruction Reference -
      -
    1. Terminator Instructions -
        -
      1. 'ret' Instruction
      2. -
      3. 'br' Instruction
      4. -
      5. 'switch' Instruction
      6. -
      7. 'indirectbr' Instruction
      8. -
      9. 'invoke' Instruction
      10. -
      11. 'resume' Instruction
      12. -
      13. 'unreachable' Instruction
      14. -
      -
    2. -
    3. Binary Operations -
        -
      1. 'add' Instruction
      2. -
      3. 'fadd' Instruction
      4. -
      5. 'sub' Instruction
      6. -
      7. 'fsub' Instruction
      8. -
      9. 'mul' Instruction
      10. -
      11. 'fmul' Instruction
      12. -
      13. 'udiv' Instruction
      14. -
      15. 'sdiv' Instruction
      16. -
      17. 'fdiv' Instruction
      18. -
      19. 'urem' Instruction
      20. -
      21. 'srem' Instruction
      22. -
      23. 'frem' Instruction
      24. -
      -
    4. -
    5. Bitwise Binary Operations -
        -
      1. 'shl' Instruction
      2. -
      3. 'lshr' Instruction
      4. -
      5. 'ashr' Instruction
      6. -
      7. 'and' Instruction
      8. -
      9. 'or' Instruction
      10. -
      11. 'xor' Instruction
      12. -
      -
    6. -
    7. Vector Operations -
        -
      1. 'extractelement' Instruction
      2. -
      3. 'insertelement' Instruction
      4. -
      5. 'shufflevector' Instruction
      6. -
      -
    8. -
    9. Aggregate Operations -
        -
      1. 'extractvalue' Instruction
      2. -
      3. 'insertvalue' Instruction
      4. -
      -
    10. -
    11. Memory Access and Addressing Operations -
        -
      1. 'alloca' Instruction
      2. -
      3. 'load' Instruction
      4. -
      5. 'store' Instruction
      6. -
      7. 'fence' Instruction
      8. -
      9. 'cmpxchg' Instruction
      10. -
      11. 'atomicrmw' Instruction
      12. -
      13. 'getelementptr' Instruction
      14. -
      -
    12. -
    13. Conversion Operations -
        -
      1. 'trunc .. to' Instruction
      2. -
      3. 'zext .. to' Instruction
      4. -
      5. 'sext .. to' Instruction
      6. -
      7. 'fptrunc .. to' Instruction
      8. -
      9. 'fpext .. to' Instruction
      10. -
      11. 'fptoui .. to' Instruction
      12. -
      13. 'fptosi .. to' Instruction
      14. -
      15. 'uitofp .. to' Instruction
      16. -
      17. 'sitofp .. to' Instruction
      18. -
      19. 'ptrtoint .. to' Instruction
      20. -
      21. 'inttoptr .. to' Instruction
      22. -
      23. 'bitcast .. to' Instruction
      24. -
      -
    14. -
    15. Other Operations -
        -
      1. 'icmp' Instruction
      2. -
      3. 'fcmp' Instruction
      4. -
      5. 'phi' Instruction
      6. -
      7. 'select' Instruction
      8. -
      9. 'call' Instruction
      10. -
      11. 'va_arg' Instruction
      12. -
      13. 'landingpad' Instruction
      14. -
      -
    16. -
    -
  20. -
  21. Intrinsic Functions -
      -
    1. Variable Argument Handling Intrinsics -
        -
      1. 'llvm.va_start' Intrinsic
      2. -
      3. 'llvm.va_end' Intrinsic
      4. -
      5. 'llvm.va_copy' Intrinsic
      6. -
      -
    2. -
    3. Accurate Garbage Collection Intrinsics -
        -
      1. 'llvm.gcroot' Intrinsic
      2. -
      3. 'llvm.gcread' Intrinsic
      4. -
      5. 'llvm.gcwrite' Intrinsic
      6. -
      -
    4. -
    5. Code Generator Intrinsics -
        -
      1. 'llvm.returnaddress' Intrinsic
      2. -
      3. 'llvm.frameaddress' Intrinsic
      4. -
      5. 'llvm.stacksave' Intrinsic
      6. -
      7. 'llvm.stackrestore' Intrinsic
      8. -
      9. 'llvm.prefetch' Intrinsic
      10. -
      11. 'llvm.pcmarker' Intrinsic
      12. -
      13. 'llvm.readcyclecounter' Intrinsic
      14. -
      -
    6. -
    7. Standard C Library Intrinsics -
        -
      1. 'llvm.memcpy.*' Intrinsic
      2. -
      3. 'llvm.memmove.*' Intrinsic
      4. -
      5. 'llvm.memset.*' Intrinsic
      6. -
      7. 'llvm.sqrt.*' Intrinsic
      8. -
      9. 'llvm.powi.*' Intrinsic
      10. -
      11. 'llvm.sin.*' Intrinsic
      12. -
      13. 'llvm.cos.*' Intrinsic
      14. -
      15. 'llvm.pow.*' Intrinsic
      16. -
      17. 'llvm.exp.*' Intrinsic
      18. -
      19. 'llvm.log.*' Intrinsic
      20. -
      21. 'llvm.fma.*' Intrinsic
      22. -
      23. 'llvm.fabs.*' Intrinsic
      24. -
      25. 'llvm.floor.*' Intrinsic
      26. -
      -
    8. -
    9. Bit Manipulation Intrinsics -
        -
      1. 'llvm.bswap.*' Intrinsics
      2. -
      3. 'llvm.ctpop.*' Intrinsic
      4. -
      5. 'llvm.ctlz.*' Intrinsic
      6. -
      7. 'llvm.cttz.*' Intrinsic
      8. -
      -
    10. -
    11. Arithmetic with Overflow Intrinsics -
        -
      1. 'llvm.sadd.with.overflow.* Intrinsics
      2. -
      3. 'llvm.uadd.with.overflow.* Intrinsics
      4. -
      5. 'llvm.ssub.with.overflow.* Intrinsics
      6. -
      7. 'llvm.usub.with.overflow.* Intrinsics
      8. -
      9. 'llvm.smul.with.overflow.* Intrinsics
      10. -
      11. 'llvm.umul.with.overflow.* Intrinsics
      12. -
      -
    12. -
    13. Specialised Arithmetic Intrinsics -
        -
      1. 'llvm.fmuladd Intrinsic
      2. -
      -
    14. -
    15. Half Precision Floating Point Intrinsics -
        -
      1. 'llvm.convert.to.fp16' Intrinsic
      2. -
      3. 'llvm.convert.from.fp16' Intrinsic
      4. -
      -
    16. -
    17. Debugger intrinsics
    18. -
    19. Exception Handling intrinsics
    20. -
    21. Trampoline Intrinsics -
        -
      1. 'llvm.init.trampoline' Intrinsic
      2. -
      3. 'llvm.adjust.trampoline' Intrinsic
      4. -
      -
    22. -
    23. Memory Use Markers -
        -
      1. 'llvm.lifetime.start' Intrinsic
      2. -
      3. 'llvm.lifetime.end' Intrinsic
      4. -
      5. 'llvm.invariant.start' Intrinsic
      6. -
      7. 'llvm.invariant.end' Intrinsic
      8. -
      -
    24. -
    25. General intrinsics -
        -
      1. - 'llvm.var.annotation' Intrinsic
      2. -
      3. - 'llvm.annotation.*' Intrinsic
      4. -
      5. - 'llvm.trap' Intrinsic
      6. -
      7. - 'llvm.debugtrap' Intrinsic
      8. -
      9. - 'llvm.stackprotector' Intrinsic
      10. -
      11. - 'llvm.objectsize' Intrinsic
      12. -
      13. - 'llvm.expect' Intrinsic
      14. -
      15. - 'llvm.donothing' Intrinsic
      16. -
      -
    26. -
    -
  22. -
- -
-

Written by Chris Lattner - and Vikram Adve

-
- - -

Abstract

- - -
- -

This document is a reference manual for the LLVM assembly language. LLVM is - a Static Single Assignment (SSA) based representation that provides type - safety, low-level operations, flexibility, and the capability of representing - 'all' high-level languages cleanly. It is the common code representation - used throughout all phases of the LLVM compilation strategy.

- -
- - -

Introduction

- - -
- -

The LLVM code representation is designed to be used in three different forms: - as an in-memory compiler IR, as an on-disk bitcode representation (suitable - for fast loading by a Just-In-Time compiler), and as a human readable - assembly language representation. This allows LLVM to provide a powerful - intermediate representation for efficient compiler transformations and - analysis, while providing a natural means to debug and visualize the - transformations. The three different forms of LLVM are all equivalent. This - document describes the human readable representation and notation.

- -

The LLVM representation aims to be light-weight and low-level while being - expressive, typed, and extensible at the same time. It aims to be a - "universal IR" of sorts, by being at a low enough level that high-level ideas - may be cleanly mapped to it (similar to how microprocessors are "universal - IR's", allowing many source languages to be mapped to them). By providing - type information, LLVM can be used as the target of optimizations: for - example, through pointer analysis, it can be proven that a C automatic - variable is never accessed outside of the current function, allowing it to - be promoted to a simple SSA value instead of a memory location.

- - -

- Well-Formedness -

- -
- -

It is important to note that this document describes 'well formed' LLVM - assembly language. There is a difference between what the parser accepts and - what is considered 'well formed'. For example, the following instruction is - syntactically okay, but not well formed:

- -
-%x = add i32 1, %x
-
- -

because the definition of %x does not dominate all of its uses. The - LLVM infrastructure provides a verification pass that may be used to verify - that an LLVM module is well formed. This pass is automatically run by the - parser after parsing input assembly and by the optimizer before it outputs - bitcode. The violations pointed out by the verifier pass indicate bugs in - transformation passes or input to the parser.

- -
- -
- - - - -

Identifiers

- - -
- -

LLVM identifiers come in two basic types: global and local. Global - identifiers (functions, global variables) begin with the '@' - character. Local identifiers (register names, types) begin with - the '%' character. Additionally, there are three different formats - for identifiers, for different purposes:

- -
    -
  1. Named values are represented as a string of characters with their prefix. - For example, %foo, @DivisionByZero, - %a.really.long.identifier. The actual regular expression used is - '[%@][a-zA-Z$._][a-zA-Z$._0-9]*'. Identifiers which require - other characters in their names can be surrounded with quotes. Special - characters may be escaped using "\xx" where xx is the - ASCII code for the character in hexadecimal. In this way, any character - can be used in a name value, even quotes themselves.
  2. - -
  3. Unnamed values are represented as an unsigned numeric value with their - prefix. For example, %12, @2, %44.
  4. - -
  5. Constants, which are described in a section about - constants, below.
  6. -
- -

LLVM requires that values start with a prefix for two reasons: Compilers - don't need to worry about name clashes with reserved words, and the set of - reserved words may be expanded in the future without penalty. Additionally, - unnamed identifiers allow a compiler to quickly come up with a temporary - variable without having to avoid symbol table conflicts.

- -

Reserved words in LLVM are very similar to reserved words in other - languages. There are keywords for different opcodes - ('add', - 'bitcast', - 'ret', etc...), for primitive type names - ('void', - 'i32', etc...), and others. These - reserved words cannot conflict with variable names, because none of them - start with a prefix character ('%' or '@').

- -

Here is an example of LLVM code to multiply the integer variable - '%X' by 8:

- -

The easy way:

- -
-%result = mul i32 %X, 8
-
- -

After strength reduction:

- -
-%result = shl i32 %X, i8 3
-
- -

And the hard way:

- -
-%0 = add i32 %X, %X           ; yields {i32}:%0
-%1 = add i32 %0, %0           ; yields {i32}:%1
-%result = add i32 %1, %1
-
- -

This last way of multiplying %X by 8 illustrates several important - lexical features of LLVM:

- -
    -
  1. Comments are delimited with a ';' and go until the end of - line.
  2. - -
  3. Unnamed temporaries are created when the result of a computation is not - assigned to a named value.
  4. - -
  5. Unnamed temporaries are numbered sequentially
  6. -
- -

It also shows a convention that we follow in this document. When - demonstrating instructions, we will follow an instruction with a comment that - defines the type and name of value produced. Comments are shown in italic - text.

- -
- - -

High Level Structure

- -
- -

- Module Structure -

- -
- -

LLVM programs are composed of Modules, each of which is a - translation unit of the input programs. Each module consists of functions, - global variables, and symbol table entries. Modules may be combined together - with the LLVM linker, which merges function (and global variable) - definitions, resolves forward declarations, and merges symbol table - entries. Here is an example of the "hello world" module:

- -
-; Declare the string constant as a global constant. 
-@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" 
-
-; External declaration of the puts function 
-declare i32 @puts(i8* nocapture) nounwind 
-
-; Definition of main function
-define i32 @main() {   ; i32()*  
-  ; Convert [13 x i8]* to i8  *... 
-  %cast210 = getelementptr [13 x i8]* @.str, i64 0, i64 0
-
-  ; Call puts function to write out the string to stdout. 
-  call i32 @puts(i8* %cast210)
-  ret i32 0 
-}
-
-; Named metadata
-!1 = metadata !{i32 42}
-!foo = !{!1, null}
-
- -

This example is made up of a global variable named - ".str", an external declaration of the "puts" function, - a function definition for - "main" and named metadata - "foo".

- -

In general, a module is made up of a list of global values (where both - functions and global variables are global values). Global values are - represented by a pointer to a memory location (in this case, a pointer to an - array of char, and a pointer to a function), and have one of the - following linkage types.

- -
- - -

- Linkage Types -

- -
- -

All Global Variables and Functions have one of the following types of - linkage:

- -
-
private
-
Global values with "private" linkage are only directly accessible - by objects in the current module. In particular, linking code into a - module with an private global value may cause the private to be renamed as - necessary to avoid collisions. Because the symbol is private to the - module, all references can be updated. This doesn't show up in any symbol - table in the object file.
- -
linker_private
-
Similar to private, but the symbol is passed through the - assembler and evaluated by the linker. Unlike normal strong symbols, they - are removed by the linker from the final linked image (executable or - dynamic library).
- -
linker_private_weak
-
Similar to "linker_private", but the symbol is weak. Note that - linker_private_weak symbols are subject to coalescing by the - linker. The symbols are removed by the linker from the final linked image - (executable or dynamic library).
- -
internal
-
Similar to private, but the value shows as a local symbol - (STB_LOCAL in the case of ELF) in the object file. This - corresponds to the notion of the 'static' keyword in C.
- -
available_externally
-
Globals with "available_externally" linkage are never emitted - into the object file corresponding to the LLVM module. They exist to - allow inlining and other optimizations to take place given knowledge of - the definition of the global, which is known to be somewhere outside the - module. Globals with available_externally linkage are allowed to - be discarded at will, and are otherwise the same as linkonce_odr. - This linkage type is only allowed on definitions, not declarations.
- -
linkonce
-
Globals with "linkonce" linkage are merged with other globals of - the same name when linkage occurs. This can be used to implement - some forms of inline functions, templates, or other code which must be - generated in each translation unit that uses it, but where the body may - be overridden with a more definitive definition later. Unreferenced - linkonce globals are allowed to be discarded. Note that - linkonce linkage does not actually allow the optimizer to - inline the body of this function into callers because it doesn't know if - this definition of the function is the definitive definition within the - program or whether it will be overridden by a stronger definition. - To enable inlining and other optimizations, use "linkonce_odr" - linkage.
- -
weak
-
"weak" linkage has the same merging semantics as - linkonce linkage, except that unreferenced globals with - weak linkage may not be discarded. This is used for globals that - are declared "weak" in C source code.
- -
common
-
"common" linkage is most similar to "weak" linkage, but - they are used for tentative definitions in C, such as "int X;" at - global scope. - Symbols with "common" linkage are merged in the same way as - weak symbols, and they may not be deleted if unreferenced. - common symbols may not have an explicit section, - must have a zero initializer, and may not be marked 'constant'. Functions and aliases may not - have common linkage.
- - -
appending
-
"appending" linkage may only be applied to global variables of - pointer to array type. When two global variables with appending linkage - are linked together, the two global arrays are appended together. This is - the LLVM, typesafe, equivalent of having the system linker append together - "sections" with identical names when .o files are linked.
- -
extern_weak
-
The semantics of this linkage follow the ELF object file model: the symbol - is weak until linked, if not linked, the symbol becomes null instead of - being an undefined reference.
- -
linkonce_odr
-
weak_odr
-
Some languages allow differing globals to be merged, such as two functions - with different semantics. Other languages, such as C++, ensure - that only equivalent globals are ever merged (the "one definition rule" - — "ODR"). Such languages can use the linkonce_odr - and weak_odr linkage types to indicate that the global will only - be merged with equivalent globals. These linkage types are otherwise the - same as their non-odr versions.
- -
linkonce_odr_auto_hide
-
Similar to "linkonce_odr", but nothing in the translation unit - takes the address of this definition. For instance, functions that had an - inline definition, but the compiler decided not to inline it. - linkonce_odr_auto_hide may have only default visibility. - The symbols are removed by the linker from the final linked image - (executable or dynamic library).
- -
external
-
If none of the above identifiers are used, the global is externally - visible, meaning that it participates in linkage and can be used to - resolve external symbol references.
-
- -

The next two types of linkage are targeted for Microsoft Windows platform - only. They are designed to support importing (exporting) symbols from (to) - DLLs (Dynamic Link Libraries).

- -
-
dllimport
-
"dllimport" linkage causes the compiler to reference a function - or variable via a global pointer to a pointer that is set up by the DLL - exporting the symbol. On Microsoft Windows targets, the pointer name is - formed by combining __imp_ and the function or variable - name.
- -
dllexport
-
"dllexport" linkage causes the compiler to provide a global - pointer to a pointer in a DLL, so that it can be referenced with the - dllimport attribute. On Microsoft Windows targets, the pointer - name is formed by combining __imp_ and the function or - variable name.
-
- -

For example, since the ".LC0" variable is defined to be internal, if - another module defined a ".LC0" variable and was linked with this - one, one of the two would be renamed, preventing a collision. Since - "main" and "puts" are external (i.e., lacking any linkage - declarations), they are accessible outside of the current module.

- -

It is illegal for a function declaration to have any linkage type - other than external, dllimport - or extern_weak.

- -

Aliases can have only external, internal, weak - or weak_odr linkages.

- -
- - -

- Calling Conventions -

- -
- -

LLVM functions, calls - and invokes can all have an optional calling - convention specified for the call. The calling convention of any pair of - dynamic caller/callee must match, or the behavior of the program is - undefined. The following calling conventions are supported by LLVM, and more - may be added in the future:

- -
-
"ccc" - The C calling convention:
-
This calling convention (the default if no other calling convention is - specified) matches the target C calling conventions. This calling - convention supports varargs function calls and tolerates some mismatch in - the declared prototype and implemented declaration of the function (as - does normal C).
- -
"fastcc" - The fast calling convention:
-
This calling convention attempts to make calls as fast as possible - (e.g. by passing things in registers). This calling convention allows the - target to use whatever tricks it wants to produce fast code for the - target, without having to conform to an externally specified ABI - (Application Binary Interface). - Tail calls can only be optimized - when this or the GHC convention is used. This calling convention - does not support varargs and requires the prototype of all callees to - exactly match the prototype of the function definition.
- -
"coldcc" - The cold calling convention:
-
This calling convention attempts to make code in the caller as efficient - as possible under the assumption that the call is not commonly executed. - As such, these calls often preserve all registers so that the call does - not break any live ranges in the caller side. This calling convention - does not support varargs and requires the prototype of all callees to - exactly match the prototype of the function definition.
- -
"cc 10" - GHC convention:
-
This calling convention has been implemented specifically for use by the - Glasgow Haskell Compiler (GHC). - It passes everything in registers, going to extremes to achieve this by - disabling callee save registers. This calling convention should not be - used lightly but only for specific situations such as an alternative to - the register pinning performance technique often used when - implementing functional programming languages.At the moment only X86 - supports this convention and it has the following limitations: -
    -
  • On X86-32 only supports up to 4 bit type parameters. No - floating point types are supported.
  • -
  • On X86-64 only supports up to 10 bit type parameters and - 6 floating point parameters.
  • -
- This calling convention supports - tail call optimization but - requires both the caller and callee are using it. -
- -
"cc <n>" - Numbered convention:
-
Any calling convention may be specified by number, allowing - target-specific calling conventions to be used. Target specific calling - conventions start at 64.
-
- -

More calling conventions can be added/defined on an as-needed basis, to - support Pascal conventions or any other well-known target-independent - convention.

- -
- - -

- Visibility Styles -

- -
- -

All Global Variables and Functions have one of the following visibility - styles:

- -
-
"default" - Default style:
-
On targets that use the ELF object file format, default visibility means - that the declaration is visible to other modules and, in shared libraries, - means that the declared entity may be overridden. On Darwin, default - visibility means that the declaration is visible to other modules. Default - visibility corresponds to "external linkage" in the language.
- -
"hidden" - Hidden style:
-
Two declarations of an object with hidden visibility refer to the same - object if they are in the same shared object. Usually, hidden visibility - indicates that the symbol will not be placed into the dynamic symbol - table, so no other module (executable or shared library) can reference it - directly.
- -
"protected" - Protected style:
-
On ELF, protected visibility indicates that the symbol will be placed in - the dynamic symbol table, but that references within the defining module - will bind to the local symbol. That is, the symbol cannot be overridden by - another module.
-
- -
- - -

- Named Types -

- -
- -

LLVM IR allows you to specify name aliases for certain types. This can make - it easier to read the IR and make the IR more condensed (particularly when - recursive types are involved). An example of a name specification is:

- -
-%mytype = type { %mytype*, i32 }
-
- -

You may give a name to any type except - "void". Type name aliases may be used anywhere a type - is expected with the syntax "%mytype".

- -

Note that type names are aliases for the structural type that they indicate, - and that you can therefore specify multiple names for the same type. This - often leads to confusing behavior when dumping out a .ll file. Since LLVM IR - uses structural typing, the name is not part of the type. When printing out - LLVM IR, the printer will pick one name to render all types of a - particular shape. This means that if you have code where two different - source types end up having the same LLVM type, that the dumper will sometimes - print the "wrong" or unexpected type. This is an important design point and - isn't going to change.

- -
- - -

- Global Variables -

- -
- -

Global variables define regions of memory allocated at compilation time - instead of run-time. Global variables may optionally be initialized, may - have an explicit section to be placed in, and may have an optional explicit - alignment specified.

- -

A variable may be defined as thread_local, which - means that it will not be shared by threads (each thread will have a - separated copy of the variable). Not all targets support thread-local - variables. Optionally, a TLS model may be specified:

- -
-
localdynamic:
-
For variables that are only used within the current shared library.
- -
initialexec:
-
For variables in modules that will not be loaded dynamically.
- -
localexec:
-
For variables defined in the executable and only used within it.
-
- -

The models correspond to the ELF TLS models; see - ELF - Handling For Thread-Local Storage for more information on under which - circumstances the different models may be used. The target may choose a - different TLS model if the specified model is not supported, or if a better - choice of model can be made.

- -

A variable may be defined as a global - "constant," which indicates that the contents of the variable - will never be modified (enabling better optimization, allowing the - global data to be placed in the read-only section of an executable, etc). - Note that variables that need runtime initialization cannot be marked - "constant" as there is a store to the variable.

- -

LLVM explicitly allows declarations of global variables to be marked - constant, even if the final definition of the global is not. This capability - can be used to enable slightly better optimization of the program, but - requires the language definition to guarantee that optimizations based on the - 'constantness' are valid for the translation units that do not include the - definition.

- -

As SSA values, global variables define pointer values that are in scope - (i.e. they dominate) all basic blocks in the program. Global variables - always define a pointer to their "content" type because they describe a - region of memory, and all memory objects in LLVM are accessed through - pointers.

- -

Global variables can be marked with unnamed_addr which indicates - that the address is not significant, only the content. Constants marked - like this can be merged with other constants if they have the same - initializer. Note that a constant with significant address can - be merged with a unnamed_addr constant, the result being a - constant whose address is significant.

- -

A global variable may be declared to reside in a target-specific numbered - address space. For targets that support them, address spaces may affect how - optimizations are performed and/or what target instructions are used to - access the variable. The default address space is zero. The address space - qualifier must precede any other attributes.

- -

LLVM allows an explicit section to be specified for globals. If the target - supports it, it will emit globals to the section specified.

- -

An explicit alignment may be specified for a global, which must be a power - of 2. If not present, or if the alignment is set to zero, the alignment of - the global is set by the target to whatever it feels convenient. If an - explicit alignment is specified, the global is forced to have exactly that - alignment. Targets and optimizers are not allowed to over-align the global - if the global has an assigned section. In this case, the extra alignment - could be observable: for example, code could assume that the globals are - densely packed in their section and try to iterate over them as an array, - alignment padding would break this iteration.

- -

For example, the following defines a global in a numbered address space with - an initializer, section, and alignment:

- -
-@G = addrspace(5) constant float 1.0, section "foo", align 4
-
- -

The following example defines a thread-local global with - the initialexec TLS model:

- -
-@G = thread_local(initialexec) global i32 0, align 4
-
- -
- - - -

- Functions -

- -
- -

LLVM function definitions consist of the "define" keyword, an - optional linkage type, an optional - visibility style, an optional - calling convention, - an optional unnamed_addr attribute, a return type, an optional - parameter attribute for the return type, a function - name, a (possibly empty) argument list (each with optional - parameter attributes), optional - function attributes, an optional section, an optional - alignment, an optional garbage collector name, an opening - curly brace, a list of basic blocks, and a closing curly brace.

- -

LLVM function declarations consist of the "declare" keyword, an - optional linkage type, an optional - visibility style, an optional - calling convention, - an optional unnamed_addr attribute, a return type, an optional - parameter attribute for the return type, a function - name, a possibly empty list of arguments, an optional alignment, and an - optional garbage collector name.

- -

A function definition contains a list of basic blocks, forming the CFG - (Control Flow Graph) for the function. Each basic block may optionally start - with a label (giving the basic block a symbol table entry), contains a list - of instructions, and ends with a terminator - instruction (such as a branch or function return).

- -

The first basic block in a function is special in two ways: it is immediately - executed on entrance to the function, and it is not allowed to have - predecessor basic blocks (i.e. there can not be any branches to the entry - block of a function). Because the block can have no predecessors, it also - cannot have any PHI nodes.

- -

LLVM allows an explicit section to be specified for functions. If the target - supports it, it will emit functions to the section specified.

- -

An explicit alignment may be specified for a function. If not present, or if - the alignment is set to zero, the alignment of the function is set by the - target to whatever it feels convenient. If an explicit alignment is - specified, the function is forced to have at least that much alignment. All - alignments must be a power of 2.

- -

If the unnamed_addr attribute is given, the address is know to not - be significant and two identical functions can be merged.

- -
Syntax:
-
-define [linkage] [visibility]
-       [cconv] [ret attrs]
-       <ResultType> @<FunctionName> ([argument list])
-       [fn Attrs] [section "name"] [align N]
-       [gc] { ... }
-
- -
- - -

- Aliases -

- -
- -

Aliases act as "second name" for the aliasee value (which can be either - function, global variable, another alias or bitcast of global value). Aliases - may have an optional linkage type, and an - optional visibility style.

- -
Syntax:
-
-@<Name> = alias [Linkage] [Visibility] <AliaseeTy> @<Aliasee>
-
- -
- - -

- Named Metadata -

- -
- -

Named metadata is a collection of metadata. Metadata - nodes (but not metadata strings) are the only valid operands for - a named metadata.

- -
Syntax:
-
-; Some unnamed metadata nodes, which are referenced by the named metadata.
-!0 = metadata !{metadata !"zero"}
-!1 = metadata !{metadata !"one"}
-!2 = metadata !{metadata !"two"}
-; A named metadata.
-!name = !{!0, !1, !2}
-
- -
- - -

- Parameter Attributes -

- -
- -

The return type and each parameter of a function type may have a set of - parameter attributes associated with them. Parameter attributes are - used to communicate additional information about the result or parameters of - a function. Parameter attributes are considered to be part of the function, - not of the function type, so functions with different parameter attributes - can have the same function type.

- -

Parameter attributes are simple keywords that follow the type specified. If - multiple parameter attributes are needed, they are space separated. For - example:

- -
-declare i32 @printf(i8* noalias nocapture, ...)
-declare i32 @atoi(i8 zeroext)
-declare signext i8 @returns_signed_char()
-
- -

Note that any attributes for the function result (nounwind, - readonly) come immediately after the argument list.

- -

Currently, only the following parameter attributes are defined:

- -
-
zeroext
-
This indicates to the code generator that the parameter or return value - should be zero-extended to the extent required by the target's ABI (which - is usually 32-bits, but is 8-bits for a i1 on x86-64) by the caller (for a - parameter) or the callee (for a return value).
- -
signext
-
This indicates to the code generator that the parameter or return value - should be sign-extended to the extent required by the target's ABI (which - is usually 32-bits) by the caller (for a parameter) or the callee (for a - return value).
- -
inreg
-
This indicates that this parameter or return value should be treated in a - special target-dependent fashion during while emitting code for a function - call or return (usually, by putting it in a register as opposed to memory, - though some targets use it to distinguish between two different kinds of - registers). Use of this attribute is target-specific.
- -
byval
-

This indicates that the pointer parameter should really be passed by - value to the function. The attribute implies that a hidden copy of the - pointee - is made between the caller and the callee, so the callee is unable to - modify the value in the caller. This attribute is only valid on LLVM - pointer arguments. It is generally used to pass structs and arrays by - value, but is also valid on pointers to scalars. The copy is considered - to belong to the caller not the callee (for example, - readonly functions should not write to - byval parameters). This is not a valid attribute for return - values.

- -

The byval attribute also supports specifying an alignment with - the align attribute. It indicates the alignment of the stack slot to - form and the known alignment of the pointer specified to the call site. If - the alignment is not specified, then the code generator makes a - target-specific assumption.

- -
sret
-
This indicates that the pointer parameter specifies the address of a - structure that is the return value of the function in the source program. - This pointer must be guaranteed by the caller to be valid: loads and - stores to the structure may be assumed by the callee to not to trap and - to be properly aligned. This may only be applied to the first parameter. - This is not a valid attribute for return values.
- -
noalias
-
This indicates that pointer values - based on the argument or return - value do not alias pointer values which are not based on it, - ignoring certain "irrelevant" dependencies. - For a call to the parent function, dependencies between memory - references from before or after the call and from those during the call - are "irrelevant" to the noalias keyword for the arguments and - return value used in that call. - The caller shares the responsibility with the callee for ensuring that - these requirements are met. - For further details, please see the discussion of the NoAlias response in - alias analysis.
-
- Note that this definition of noalias is intentionally - similar to the definition of restrict in C99 for function - arguments, though it is slightly weaker. -
- For function return values, C99's restrict is not meaningful, - while LLVM's noalias is. -
- -
nocapture
-
This indicates that the callee does not make any copies of the pointer - that outlive the callee itself. This is not a valid attribute for return - values.
- -
nest
-
This indicates that the pointer parameter can be excised using the - trampoline intrinsics. This is not a valid - attribute for return values.
-
- -
- - -

- Garbage Collector Names -

- -
- -

Each function may specify a garbage collector name, which is simply a - string:

- -
-define void @f() gc "name" { ... }
-
- -

The compiler declares the supported values of name. Specifying a - collector which will cause the compiler to alter its output in order to - support the named garbage collection algorithm.

- -
- - -

- Function Attributes -

- -
- -

Function attributes are set to communicate additional information about a - function. Function attributes are considered to be part of the function, not - of the function type, so functions with different parameter attributes can - have the same function type.

- -

Function attributes are simple keywords that follow the type specified. If - multiple attributes are needed, they are space separated. For example:

- -
-define void @f() noinline { ... }
-define void @f() alwaysinline { ... }
-define void @f() alwaysinline optsize { ... }
-define void @f() optsize { ... }
-
- -
-
address_safety
-
This attribute indicates that the address safety analysis - is enabled for this function.
- -
alignstack(<n>)
-
This attribute indicates that, when emitting the prologue and epilogue, - the backend should forcibly align the stack pointer. Specify the - desired alignment, which must be a power of two, in parentheses. - -
alwaysinline
-
This attribute indicates that the inliner should attempt to inline this - function into callers whenever possible, ignoring any active inlining size - threshold for this caller.
- -
nonlazybind
-
This attribute suppresses lazy symbol binding for the function. This - may make calls to the function faster, at the cost of extra program - startup time if the function is not called during program startup.
- -
inlinehint
-
This attribute indicates that the source code contained a hint that inlining - this function is desirable (such as the "inline" keyword in C/C++). It - is just a hint; it imposes no requirements on the inliner.
- -
naked
-
This attribute disables prologue / epilogue emission for the function. - This can have very system-specific consequences.
- -
noimplicitfloat
-
This attributes disables implicit floating point instructions.
- -
noinline
-
This attribute indicates that the inliner should never inline this - function in any situation. This attribute may not be used together with - the alwaysinline attribute.
- -
noredzone
-
This attribute indicates that the code generator should not use a red - zone, even if the target-specific ABI normally permits it.
- -
noreturn
-
This function attribute indicates that the function never returns - normally. This produces undefined behavior at runtime if the function - ever does dynamically return.
- -
nounwind
-
This function attribute indicates that the function never returns with an - unwind or exceptional control flow. If the function does unwind, its - runtime behavior is undefined.
- -
optsize
-
This attribute suggests that optimization passes and code generator passes - make choices that keep the code size of this function low, and otherwise - do optimizations specifically to reduce code size.
- -
readnone
-
This attribute indicates that the function computes its result (or decides - to unwind an exception) based strictly on its arguments, without - dereferencing any pointer arguments or otherwise accessing any mutable - state (e.g. memory, control registers, etc) visible to caller functions. - It does not write through any pointer arguments - (including byval arguments) and never - changes any state visible to callers. This means that it cannot unwind - exceptions by calling the C++ exception throwing methods.
- -
readonly
-
This attribute indicates that the function does not write through any - pointer arguments (including byval - arguments) or otherwise modify any state (e.g. memory, control registers, - etc) visible to caller functions. It may dereference pointer arguments - and read state that may be set in the caller. A readonly function always - returns the same value (or unwinds an exception identically) when called - with the same set of arguments and global state. It cannot unwind an - exception by calling the C++ exception throwing methods.
- -
returns_twice
-
This attribute indicates that this function can return twice. The - C setjmp is an example of such a function. The compiler - disables some optimizations (like tail calls) in the caller of these - functions.
- -
ssp
-
This attribute indicates that the function should emit a stack smashing - protector. It is in the form of a "canary"—a random value placed on - the stack before the local variables that's checked upon return from the - function to see if it has been overwritten. A heuristic is used to - determine if a function needs stack protectors or not.
-
- If a function that has an ssp attribute is inlined into a - function that doesn't have an ssp attribute, then the resulting - function will have an ssp attribute.
- -
sspreq
-
This attribute indicates that the function should always emit a - stack smashing protector. This overrides - the ssp function attribute.
-
- If a function that has an sspreq attribute is inlined into a - function that doesn't have an sspreq attribute or which has - an ssp attribute, then the resulting function will have - an sspreq attribute.
- -
uwtable
-
This attribute indicates that the ABI being targeted requires that - an unwind table entry be produce for this function even if we can - show that no exceptions passes by it. This is normally the case for - the ELF x86-64 abi, but it can be disabled for some compilation - units.
-
- -
- - -

- Module-Level Inline Assembly -

- -
- -

Modules may contain "module-level inline asm" blocks, which corresponds to - the GCC "file scope inline asm" blocks. These blocks are internally - concatenated by LLVM and treated as a single unit, but may be separated in - the .ll file if desired. The syntax is very simple:

- -
-module asm "inline asm code goes here"
-module asm "more can go here"
-
- -

The strings can contain any character by escaping non-printable characters. - The escape sequence used is simply "\xx" where "xx" is the two digit hex code - for the number.

- -

The inline asm code is simply printed to the machine code .s file when - assembly code is generated.

- -
- - -

- Data Layout -

- -
- -

A module may specify a target specific data layout string that specifies how - data is to be laid out in memory. The syntax for the data layout is - simply:

- -
-target datalayout = "layout specification"
-
- -

The layout specification consists of a list of specifications - separated by the minus sign character ('-'). Each specification starts with - a letter and may include other information after the letter to define some - aspect of the data layout. The specifications accepted are as follows:

- -
-
E
-
Specifies that the target lays out data in big-endian form. That is, the - bits with the most significance have the lowest address location.
- -
e
-
Specifies that the target lays out data in little-endian form. That is, - the bits with the least significance have the lowest address - location.
- -
Ssize
-
Specifies the natural alignment of the stack in bits. Alignment promotion - of stack variables is limited to the natural stack alignment to avoid - dynamic stack realignment. The stack alignment must be a multiple of - 8-bits. If omitted, the natural stack alignment defaults to "unspecified", - which does not prevent any alignment promotions.
- -
p[n]:size:abi:pref
-
This specifies the size of a pointer and its abi and - preferred alignments for address space n. All sizes are in - bits. Specifying the pref alignment is optional. If omitted, the - preceding : should be omitted too. The address space, - n is optional, and if not specified, denotes the default address - space 0. The value of n must be in the range [1,2^23).
- -
isize:abi:pref
-
This specifies the alignment for an integer type of a given bit - size. The value of size must be in the range [1,2^23).
- -
vsize:abi:pref
-
This specifies the alignment for a vector type of a given bit - size.
- -
fsize:abi:pref
-
This specifies the alignment for a floating point type of a given bit - size. Only values of size that are supported by the target - will work. 32 (float) and 64 (double) are supported on all targets; - 80 or 128 (different flavors of long double) are also supported on some - targets. - -
asize:abi:pref
-
This specifies the alignment for an aggregate type of a given bit - size.
- -
ssize:abi:pref
-
This specifies the alignment for a stack object of a given bit - size.
- -
nsize1:size2:size3...
-
This specifies a set of native integer widths for the target CPU - in bits. For example, it might contain "n32" for 32-bit PowerPC, - "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64. Elements of - this set are considered to support most general arithmetic - operations efficiently.
-
- -

When constructing the data layout for a given target, LLVM starts with a - default set of specifications which are then (possibly) overridden by the - specifications in the datalayout keyword. The default specifications - are given in this list:

- -
    -
  • E - big endian
  • -
  • p:64:64:64 - 64-bit pointers with 64-bit alignment
  • -
  • p1:32:32:32 - 32-bit pointers with 32-bit alignment for - address space 1
  • -
  • p2:16:32:32 - 16-bit pointers with 32-bit alignment for - address space 2
  • -
  • i1:8:8 - i1 is 8-bit (byte) aligned
  • -
  • i8:8:8 - i8 is 8-bit (byte) aligned
  • -
  • i16:16:16 - i16 is 16-bit aligned
  • -
  • i32:32:32 - i32 is 32-bit aligned
  • -
  • i64:32:64 - i64 has ABI alignment of 32-bits but preferred - alignment of 64-bits
  • -
  • f32:32:32 - float is 32-bit aligned
  • -
  • f64:64:64 - double is 64-bit aligned
  • -
  • v64:64:64 - 64-bit vector is 64-bit aligned
  • -
  • v128:128:128 - 128-bit vector is 128-bit aligned
  • -
  • a0:0:1 - aggregates are 8-bit aligned
  • -
  • s0:64:64 - stack objects are 64-bit aligned
  • -
- -

When LLVM is determining the alignment for a given type, it uses the - following rules:

- -
    -
  1. If the type sought is an exact match for one of the specifications, that - specification is used.
  2. - -
  3. If no match is found, and the type sought is an integer type, then the - smallest integer type that is larger than the bitwidth of the sought type - is used. If none of the specifications are larger than the bitwidth then - the largest integer type is used. For example, given the default - specifications above, the i7 type will use the alignment of i8 (next - largest) while both i65 and i256 will use the alignment of i64 (largest - specified).
  4. - -
  5. If no match is found, and the type sought is a vector type, then the - largest vector type that is smaller than the sought vector type will be - used as a fall back. This happens because <128 x double> can be - implemented in terms of 64 <2 x double>, for example.
  6. -
- -

The function of the data layout string may not be what you expect. Notably, - this is not a specification from the frontend of what alignment the code - generator should use.

- -

Instead, if specified, the target data layout is required to match what the - ultimate code generator expects. This string is used by the - mid-level optimizers to - improve code, and this only works if it matches what the ultimate code - generator uses. If you would like to generate IR that does not embed this - target-specific detail into the IR, then you don't have to specify the - string. This will disable some optimizations that require precise layout - information, but this also prevents those optimizations from introducing - target specificity into the IR.

- - - -
- - -

- Pointer Aliasing Rules -

- -
- -

Any memory access must be done through a pointer value associated -with an address range of the memory access, otherwise the behavior -is undefined. Pointer values are associated with address ranges -according to the following rules:

- -
    -
  • A pointer value is associated with the addresses associated with - any value it is based on. -
  • An address of a global variable is associated with the address - range of the variable's storage.
  • -
  • The result value of an allocation instruction is associated with - the address range of the allocated storage.
  • -
  • A null pointer in the default address-space is associated with - no address.
  • -
  • An integer constant other than zero or a pointer value returned - from a function not defined within LLVM may be associated with address - ranges allocated through mechanisms other than those provided by - LLVM. Such ranges shall not overlap with any ranges of addresses - allocated by mechanisms provided by LLVM.
  • -
- -

A pointer value is based on another pointer value according - to the following rules:

- -
    -
  • A pointer value formed from a - getelementptr operation - is based on the first operand of the getelementptr.
  • -
  • The result value of a - bitcast is based on the operand - of the bitcast.
  • -
  • A pointer value formed by an - inttoptr is based on all - pointer values that contribute (directly or indirectly) to the - computation of the pointer's value.
  • -
  • The "based on" relationship is transitive.
  • -
- -

Note that this definition of "based" is intentionally - similar to the definition of "based" in C99, though it is - slightly weaker.

- -

LLVM IR does not associate types with memory. The result type of a -load merely indicates the size and -alignment of the memory from which to load, as well as the -interpretation of the value. The first operand type of a -store similarly only indicates the size -and alignment of the store.

- -

Consequently, type-based alias analysis, aka TBAA, aka --fstrict-aliasing, is not applicable to general unadorned -LLVM IR. Metadata may be used to encode -additional information which specialized optimization passes may use -to implement type-based alias analysis.

- -
- - -

- Volatile Memory Accesses -

- -
- -

Certain memory accesses, such as loads, stores, and llvm.memcpys may be marked volatile. -The optimizers must not change the number of volatile operations or change their -order of execution relative to other volatile operations. The optimizers -may change the order of volatile operations relative to non-volatile -operations. This is not Java's "volatile" and has no cross-thread -synchronization behavior.

- -
- - -

- Memory Model for Concurrent Operations -

- -
- -

The LLVM IR does not define any way to start parallel threads of execution -or to register signal handlers. Nonetheless, there are platform-specific -ways to create them, and we define LLVM IR's behavior in their presence. This -model is inspired by the C++0x memory model.

- -

For a more informal introduction to this model, see the -LLVM Atomic Instructions and Concurrency Guide. - -

We define a happens-before partial order as the least partial order -that

-
    -
  • Is a superset of single-thread program order, and
  • -
  • When a synchronizes-with b, includes an edge from - a to b. Synchronizes-with pairs are introduced - by platform-specific techniques, like pthread locks, thread - creation, thread joining, etc., and by atomic instructions. - (See also Atomic Memory Ordering Constraints). -
  • -
- -

Note that program order does not introduce happens-before edges -between a thread and signals executing inside that thread.

- -

Every (defined) read operation (load instructions, memcpy, atomic -loads/read-modify-writes, etc.) R reads a series of bytes written by -(defined) write operations (store instructions, atomic -stores/read-modify-writes, memcpy, etc.). For the purposes of this section, -initialized globals are considered to have a write of the initializer which is -atomic and happens before any other read or write of the memory in question. -For each byte of a read R, Rbyte may see -any write to the same byte, except:

- -
    -
  • If write1 happens before - write2, and write2 happens - before Rbyte, then Rbyte - does not see write1. -
  • If Rbyte happens before - write3, then Rbyte does not - see write3. -
- -

Given that definition, Rbyte is defined as follows: -

    -
  • If R is volatile, the result is target-dependent. (Volatile - is supposed to give guarantees which can support - sig_atomic_t in C/C++, and may be used for accesses to - addresses which do not behave like normal memory. It does not generally - provide cross-thread synchronization.) -
  • Otherwise, if there is no write to the same byte that happens before - Rbyte, Rbyte returns - undef for that byte. -
  • Otherwise, if Rbyte may see exactly one write, - Rbyte returns the value written by that - write.
  • -
  • Otherwise, if R is atomic, and all the writes - Rbyte may see are atomic, it chooses one of the - values written. See the Atomic Memory Ordering - Constraints section for additional constraints on how the choice - is made. -
  • Otherwise Rbyte returns undef.
  • -
- -

R returns the value composed of the series of bytes it read. -This implies that some bytes within the value may be undef -without the entire value being undef. Note that this only -defines the semantics of the operation; it doesn't mean that targets will -emit more than one instruction to read the series of bytes.

- -

Note that in cases where none of the atomic intrinsics are used, this model -places only one restriction on IR transformations on top of what is required -for single-threaded execution: introducing a store to a byte which might not -otherwise be stored is not allowed in general. (Specifically, in the case -where another thread might write to and read from an address, introducing a -store can change a load that may see exactly one write into a load that may -see multiple writes.)

- - - -
- - -

- Atomic Memory Ordering Constraints -

- -
- -

Atomic instructions (cmpxchg, -atomicrmw, -fence, -atomic load, and -atomic store) take an ordering parameter -that determines which other atomic instructions on the same address they -synchronize with. These semantics are borrowed from Java and C++0x, -but are somewhat more colloquial. If these descriptions aren't precise enough, -check those specs (see spec references in the -atomics guide). -fence instructions -treat these orderings somewhat differently since they don't take an address. -See that instruction's documentation for details.

- -

For a simpler introduction to the ordering constraints, see the -LLVM Atomic Instructions and Concurrency Guide.

- -
-
unordered
-
The set of values that can be read is governed by the happens-before -partial order. A value cannot be read unless some operation wrote it. -This is intended to provide a guarantee strong enough to model Java's -non-volatile shared variables. This ordering cannot be specified for -read-modify-write operations; it is not strong enough to make them atomic -in any interesting way.
-
monotonic
-
In addition to the guarantees of unordered, there is a single -total order for modifications by monotonic operations on each -address. All modification orders must be compatible with the happens-before -order. There is no guarantee that the modification orders can be combined to -a global total order for the whole program (and this often will not be -possible). The read in an atomic read-modify-write operation -(cmpxchg and -atomicrmw) -reads the value in the modification order immediately before the value it -writes. If one atomic read happens before another atomic read of the same -address, the later read must see the same value or a later value in the -address's modification order. This disallows reordering of -monotonic (or stronger) operations on the same address. If an -address is written monotonically by one thread, and other threads -monotonically read that address repeatedly, the other threads must -eventually see the write. This corresponds to the C++0x/C1x -memory_order_relaxed.
-
acquire
-
In addition to the guarantees of monotonic, -a synchronizes-with edge may be formed with a release -operation. This is intended to model C++'s memory_order_acquire.
-
release
-
In addition to the guarantees of monotonic, if this operation -writes a value which is subsequently read by an acquire operation, -it synchronizes-with that operation. (This isn't a complete -description; see the C++0x definition of a release sequence.) This corresponds -to the C++0x/C1x memory_order_release.
-
acq_rel (acquire+release)
Acts as both an -acquire and release operation on its address. -This corresponds to the C++0x/C1x memory_order_acq_rel.
-
seq_cst (sequentially consistent)
-
In addition to the guarantees of acq_rel -(acquire for an operation which only reads, release -for an operation which only writes), there is a global total order on all -sequentially-consistent operations on all addresses, which is consistent with -the happens-before partial order and with the modification orders of -all the affected addresses. Each sequentially-consistent read sees the last -preceding write to the same address in this global order. This corresponds -to the C++0x/C1x memory_order_seq_cst and Java volatile.
-
- -

If an atomic operation is marked singlethread, -it only synchronizes with or participates in modification and seq_cst -total orderings with other operations running in the same thread (for example, -in signal handlers).

- -
- -
- - -

Type System

- - -
- -

The LLVM type system is one of the most important features of the - intermediate representation. Being typed enables a number of optimizations - to be performed on the intermediate representation directly, without having - to do extra analyses on the side before the transformation. A strong type - system makes it easier to read the generated code and enables novel analyses - and transformations that are not feasible to perform on normal three address - code representations.

- - -

- Type Classifications -

- -
- -

The types fall into a few useful classifications:

- - - - - - - - - - - - - - - - - - - - - - - - - -
ClassificationTypes
integeri1, i2, i3, ... i8, ... i16, ... i32, ... i64, ...
floating pointhalf, float, double, x86_fp80, fp128, ppc_fp128
first classinteger, - floating point, - pointer, - vector, - structure, - array, - label, - metadata. -
primitivelabel, - void, - integer, - floating point, - x86mmx, - metadata.
derivedarray, - function, - pointer, - structure, - vector, - opaque. -
- -

The first class types are perhaps the most - important. Values of these types are the only ones which can be produced by - instructions.

- -
- - -

- Primitive Types -

- -
- -

The primitive types are the fundamental building blocks of the LLVM - system.

- - -

- Integer Type -

- -
- -
Overview:
-

The integer type is a very simple type that simply specifies an arbitrary - bit width for the integer type desired. Any bit width from 1 bit to - 223-1 (about 8 million) can be specified.

- -
Syntax:
-
-  iN
-
- -

The number of bits the integer will occupy is specified by the N - value.

- -
Examples:
- - - - - - - - - - - - - -
i1a single-bit integer.
i32a 32-bit integer.
i1942652a really big integer of over 1 million bits.
- -
- - -

- Floating Point Types -

- -
- - - - - - - - - - - -
TypeDescription
half16-bit floating point value
float32-bit floating point value
double64-bit floating point value
fp128128-bit floating point value (112-bit mantissa)
x86_fp8080-bit floating point value (X87)
ppc_fp128128-bit floating point value (two 64-bits)
- -
- - -

- X86mmx Type -

- -
- -
Overview:
-

The x86mmx type represents a value held in an MMX register on an x86 machine. The operations allowed on it are quite limited: parameters and return values, load and store, and bitcast. User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type. There are no arrays, vectors or constants of this type.

- -
Syntax:
-
-  x86mmx
-
- -
- - -

- Void Type -

- -
- -
Overview:
-

The void type does not represent any value and has no size.

- -
Syntax:
-
-  void
-
- -
- - -

- Label Type -

- -
- -
Overview:
-

The label type represents code labels.

- -
Syntax:
-
-  label
-
- -
- - -

- Metadata Type -

- -
- -
Overview:
-

The metadata type represents embedded metadata. No derived types may be - created from metadata except for function - arguments. - -

Syntax:
-
-  metadata
-
- -
- -
- - -

- Derived Types -

- -
- -

The real power in LLVM comes from the derived types in the system. This is - what allows a programmer to represent arrays, functions, pointers, and other - useful types. Each of these types contain one or more element types which - may be a primitive type, or another derived type. For example, it is - possible to have a two dimensional array, using an array as the element type - of another array.

- - -

- Aggregate Types -

- -
- -

Aggregate Types are a subset of derived types that can contain multiple - member types. Arrays and - structs are aggregate types. - Vectors are not considered to be aggregate types.

- -
- - -

- Array Type -

- -
- -
Overview:
-

The array type is a very simple derived type that arranges elements - sequentially in memory. The array type requires a size (number of elements) - and an underlying data type.

- -
Syntax:
-
-  [<# elements> x <elementtype>]
-
- -

The number of elements is a constant integer value; elementtype may - be any type with a size.

- -
Examples:
- - - - - - - - - - - - - -
[40 x i32]Array of 40 32-bit integer values.
[41 x i32]Array of 41 32-bit integer values.
[4 x i8]Array of 4 8-bit integer values.
-

Here are some examples of multidimensional arrays:

- - - - - - - - - - - - - -
[3 x [4 x i32]]3x4 array of 32-bit integer values.
[12 x [10 x float]]12x10 array of single precision floating point values.
[2 x [3 x [4 x i16]]]2x3x4 array of 16-bit integer values.
- -

There is no restriction on indexing beyond the end of the array implied by - a static type (though there are restrictions on indexing beyond the bounds - of an allocated object in some cases). This means that single-dimension - 'variable sized array' addressing can be implemented in LLVM with a zero - length array type. An implementation of 'pascal style arrays' in LLVM could - use the type "{ i32, [0 x float]}", for example.

- -
- - -

- Function Type -

- -
- -
Overview:
-

The function type can be thought of as a function signature. It consists of - a return type and a list of formal parameter types. The return type of a - function type is a first class type or a void type.

- -
Syntax:
-
-  <returntype> (<parameter list>)
-
- -

...where '<parameter list>' is a comma-separated list of type - specifiers. Optionally, the parameter list may include a type ..., - which indicates that the function takes a variable number of arguments. - Variable argument functions can access their arguments with - the variable argument handling intrinsic - functions. '<returntype>' is any type except - label.

- -
Examples:
- - - - - - - - - - - - - - -
i32 (i32)function taking an i32, returning an i32 -
float (i16, i32 *) * - Pointer to a function that takes - an i16 and a pointer to i32, - returning float. -
i32 (i8*, ...)A vararg function that takes at least one - pointer to i8 (char in C), - which returns an integer. This is the signature for printf in - LLVM. -
{i32, i32} (i32)A function taking an i32, returning a - structure containing two i32 values -
- -
- - -

- Structure Type -

- -
- -
Overview:
-

The structure type is used to represent a collection of data members together - in memory. The elements of a structure may be any type that has a size.

- -

Structures in memory are accessed using 'load' - and 'store' by getting a pointer to a field - with the 'getelementptr' instruction. - Structures in registers are accessed using the - 'extractvalue' and - 'insertvalue' instructions.

- -

Structures may optionally be "packed" structures, which indicate that the - alignment of the struct is one byte, and that there is no padding between - the elements. In non-packed structs, padding between field types is inserted - as defined by the DataLayout string in the module, which is required to match - what the underlying code generator expects.

- -

Structures can either be "literal" or "identified". A literal structure is - defined inline with other types (e.g. {i32, i32}*) whereas identified - types are always defined at the top level with a name. Literal types are - uniqued by their contents and can never be recursive or opaque since there is - no way to write one. Identified types can be recursive, can be opaqued, and are - never uniqued. -

- -
Syntax:
-
-  %T1 = type { <type list> }     ; Identified normal struct type
-  %T2 = type <{ <type list> }>   ; Identified packed struct type
-
- -
Examples:
- - - - - - - - - - - - - -
{ i32, i32, i32 }A triple of three i32 values
{ float, i32 (i32) * }A pair, where the first element is a float and the - second element is a pointer to a - function that takes an i32, returning - an i32.
<{ i8, i32 }>A packed struct known to be 5 bytes in size.
- -
- - -

- Opaque Structure Types -

- -
- -
Overview:
-

Opaque structure types are used to represent named structure types that do - not have a body specified. This corresponds (for example) to the C notion of - a forward declared structure.

- -
Syntax:
-
-  %X = type opaque
-  %52 = type opaque
-
- -
Examples:
- - - - - -
opaqueAn opaque type.
- -
- - - - -

- Pointer Type -

- -
- -
Overview:
-

The pointer type is used to specify memory locations. - Pointers are commonly used to reference objects in memory.

- -

Pointer types may have an optional address space attribute defining the - numbered address space where the pointed-to object resides. The default - address space is number zero. The semantics of non-zero address - spaces are target-specific.

- -

Note that LLVM does not permit pointers to void (void*) nor does it - permit pointers to labels (label*). Use i8* instead.

- -
Syntax:
-
-  <type> *
-
- -
Examples:
- - - - - - - - - - - - - -
[4 x i32]*A pointer to array of four i32 values.
i32 (i32*) * A pointer to a function that takes an i32*, returning an - i32.
i32 addrspace(5)*A pointer to an i32 value - that resides in address space #5.
- -
- - -

- Vector Type -

- -
- -
Overview:
-

A vector type is a simple derived type that represents a vector of elements. - Vector types are used when multiple primitive data are operated in parallel - using a single instruction (SIMD). A vector type requires a size (number of - elements) and an underlying primitive data type. Vector types are considered - first class.

- -
Syntax:
-
-  < <# elements> x <elementtype> >
-
- -

The number of elements is a constant integer value larger than 0; elementtype - may be any integer or floating point type, or a pointer to these types. - Vectors of size zero are not allowed.

- -
Examples:
- - - - - - - - - - - - - - - - - -
<4 x i32>Vector of 4 32-bit integer values.
<8 x float>Vector of 8 32-bit floating-point values.
<2 x i64>Vector of 2 64-bit integer values.
<4 x i64*>Vector of 4 pointers to 64-bit integer values.
- -
- -
- -
- - -

Constants

- - -
- -

LLVM has several different basic types of constants. This section describes - them all and their syntax.

- - -

- Simple Constants -

- -
- -
-
Boolean constants
-
The two strings 'true' and 'false' are both valid - constants of the i1 type.
- -
Integer constants
-
Standard integers (such as '4') are constants of - the integer type. Negative numbers may be used - with integer types.
- -
Floating point constants
-
Floating point constants use standard decimal notation (e.g. 123.421), - exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal - notation (see below). The assembler requires the exact decimal value of a - floating-point constant. For example, the assembler accepts 1.25 but - rejects 1.3 because 1.3 is a repeating decimal in binary. Floating point - constants must have a floating point type.
- -
Null pointer constants
-
The identifier 'null' is recognized as a null pointer constant - and must be of pointer type.
-
- -

The one non-intuitive notation for constants is the hexadecimal form of - floating point constants. For example, the form 'double - 0x432ff973cafa8000' is equivalent to (but harder to read than) - 'double 4.5e+15'. The only time hexadecimal floating point - constants are required (and the only time that they are generated by the - disassembler) is when a floating point constant must be emitted but it cannot - be represented as a decimal floating point number in a reasonable number of - digits. For example, NaN's, infinities, and other special values are - represented in their IEEE hexadecimal format so that assembly and disassembly - do not cause any bits to change in the constants.

- -

When using the hexadecimal form, constants of types half, float, and double are - represented using the 16-digit form shown above (which matches the IEEE754 - representation for double); half and float values must, however, be exactly - representable as IEE754 half and single precision, respectively. - Hexadecimal format is always used - for long double, and there are three forms of long double. The 80-bit format - used by x86 is represented as 0xK followed by 20 hexadecimal digits. - The 128-bit format used by PowerPC (two adjacent doubles) is represented - by 0xM followed by 32 hexadecimal digits. The IEEE 128-bit format - is represented by 0xL followed by 32 hexadecimal digits; no - currently supported target uses this format. Long doubles will only work if - they match the long double format on your target. The IEEE 16-bit format - (half precision) is represented by 0xH followed by 4 hexadecimal - digits. All hexadecimal formats are big-endian (sign bit at the left).

- -

There are no constants of type x86mmx.

-
- - -

- -Complex Constants -

- -
- -

Complex constants are a (potentially recursive) combination of simple - constants and smaller complex constants.

- -
-
Structure constants
-
Structure constants are represented with notation similar to structure - type definitions (a comma separated list of elements, surrounded by braces - ({})). For example: "{ i32 4, float 17.0, i32* @G }", - where "@G" is declared as "@G = external global i32". - Structure constants must have structure type, and - the number and types of elements must match those specified by the - type.
- -
Array constants
-
Array constants are represented with notation similar to array type - definitions (a comma separated list of elements, surrounded by square - brackets ([])). For example: "[ i32 42, i32 11, i32 74 - ]". Array constants must have array type, and - the number and types of elements must match those specified by the - type.
- -
Vector constants
-
Vector constants are represented with notation similar to vector type - definitions (a comma separated list of elements, surrounded by - less-than/greater-than's (<>)). For example: "< i32 - 42, i32 11, i32 74, i32 100 >". Vector constants must - have vector type, and the number and types of - elements must match those specified by the type.
- -
Zero initialization
-
The string 'zeroinitializer' can be used to zero initialize a - value to zero of any type, including scalar and - aggregate types. - This is often used to avoid having to print large zero initializers - (e.g. for large arrays) and is always exactly equivalent to using explicit - zero initializers.
- -
Metadata node
-
A metadata node is a structure-like constant with - metadata type. For example: "metadata !{ - i32 0, metadata !"test" }". Unlike other constants that are meant to - be interpreted as part of the instruction stream, metadata is a place to - attach additional information such as debug info.
-
- -
- - -

- Global Variable and Function Addresses -

- -
- -

The addresses of global variables - and functions are always implicitly valid - (link-time) constants. These constants are explicitly referenced when - the identifier for the global is used and always - have pointer type. For example, the following is a - legal LLVM file:

- -
-@X = global i32 17
-@Y = global i32 42
-@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
-
- -
- - -

- Undefined Values -

- -
- -

The string 'undef' can be used anywhere a constant is expected, and - indicates that the user of the value may receive an unspecified bit-pattern. - Undefined values may be of any type (other than 'label' - or 'void') and be used anywhere a constant is permitted.

- -

Undefined values are useful because they indicate to the compiler that the - program is well defined no matter what value is used. This gives the - compiler more freedom to optimize. Here are some examples of (potentially - surprising) transformations that are valid (in pseudo IR):

- - -
-  %A = add %X, undef
-  %B = sub %X, undef
-  %C = xor %X, undef
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-
- -

This is safe because all of the output bits are affected by the undef bits. - Any output bit can have a zero or one depending on the input bits.

- -
-  %A = or %X, undef
-  %B = and %X, undef
-Safe:
-  %A = -1
-  %B = 0
-Unsafe:
-  %A = undef
-  %B = undef
-
- -

These logical operations have bits that are not always affected by the input. - For example, if %X has a zero bit, then the output of the - 'and' operation will always be a zero for that bit, no matter what - the corresponding bit from the 'undef' is. As such, it is unsafe to - optimize or assume that the result of the 'and' is 'undef'. - However, it is safe to assume that all bits of the 'undef' could be - 0, and optimize the 'and' to 0. Likewise, it is safe to assume that - all the bits of the 'undef' operand to the 'or' could be - set, allowing the 'or' to be folded to -1.

- -
-  %A = select undef, %X, %Y
-  %B = select undef, 42, %Y
-  %C = select %X, %Y, undef
-Safe:
-  %A = %X     (or %Y)
-  %B = 42     (or %Y)
-  %C = %Y
-Unsafe:
-  %A = undef
-  %B = undef
-  %C = undef
-
- -

This set of examples shows that undefined 'select' (and conditional - branch) conditions can go either way, but they have to come from one - of the two operands. In the %A example, if %X and - %Y were both known to have a clear low bit, then %A would - have to have a cleared low bit. However, in the %C example, the - optimizer is allowed to assume that the 'undef' operand could be the - same as %Y, allowing the whole 'select' to be - eliminated.

- -
-  %A = xor undef, undef
-
-  %B = undef
-  %C = xor %B, %B
-
-  %D = undef
-  %E = icmp lt %D, 4
-  %F = icmp gte %D, 4
-
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-  %D = undef
-  %E = undef
-  %F = undef
-
- -

This example points out that two 'undef' operands are not - necessarily the same. This can be surprising to people (and also matches C - semantics) where they assume that "X^X" is always zero, even - if X is undefined. This isn't true for a number of reasons, but the - short answer is that an 'undef' "variable" can arbitrarily change - its value over its "live range". This is true because the variable doesn't - actually have a live range. Instead, the value is logically read - from arbitrary registers that happen to be around when needed, so the value - is not necessarily consistent over time. In fact, %A and %C - need to have the same semantics or the core LLVM "replace all uses with" - concept would not hold.

- -
-  %A = fdiv undef, %X
-  %B = fdiv %X, undef
-Safe:
-  %A = undef
-b: unreachable
-
- -

These examples show the crucial difference between an undefined - value and undefined behavior. An undefined value (like - 'undef') is allowed to have an arbitrary bit-pattern. This means that - the %A operation can be constant folded to 'undef', because - the 'undef' could be an SNaN, and fdiv is not (currently) - defined on SNaN's. However, in the second example, we can make a more - aggressive assumption: because the undef is allowed to be an - arbitrary value, we are allowed to assume that it could be zero. Since a - divide by zero has undefined behavior, we are allowed to assume that - the operation does not execute at all. This allows us to delete the divide and - all code after it. Because the undefined operation "can't happen", the - optimizer can assume that it occurs in dead code.

- -
-a:  store undef -> %X
-b:  store %X -> undef
-Safe:
-a: <deleted>
-b: unreachable
-
- -

These examples reiterate the fdiv example: a store of an - undefined value can be assumed to not have any effect; we can assume that the - value is overwritten with bits that happen to match what was already there. - However, a store to an undefined location could clobber arbitrary - memory, therefore, it has undefined behavior.

- -
- - -

- Poison Values -

- -
- -

Poison values are similar to undef values, however - they also represent the fact that an instruction or constant expression which - cannot evoke side effects has nevertheless detected a condition which results - in undefined behavior.

- -

There is currently no way of representing a poison value in the IR; they - only exist when produced by operations such as - add with the nsw flag.

- -

Poison value behavior is defined in terms of value dependence:

- -
    -
  • Values other than phi nodes depend on - their operands.
  • - -
  • Phi nodes depend on the operand corresponding - to their dynamic predecessor basic block.
  • - -
  • Function arguments depend on the corresponding actual argument values in - the dynamic callers of their functions.
  • - -
  • Call instructions depend on the - ret instructions that dynamically transfer - control back to them.
  • - -
  • Invoke instructions depend on the - ret, resume, - or exception-throwing call instructions that dynamically transfer control - back to them.
  • - -
  • Non-volatile loads and stores depend on the most recent stores to all of the - referenced memory addresses, following the order in the IR - (including loads and stores implied by intrinsics such as - @llvm.memcpy.)
  • - - - - - -
  • An instruction with externally visible side effects depends on the most - recent preceding instruction with externally visible side effects, following - the order in the IR. (This includes - volatile operations.)
  • - -
  • An instruction control-depends on a - terminator instruction - if the terminator instruction has multiple successors and the instruction - is always executed when control transfers to one of the successors, and - may not be executed when control is transferred to another.
  • - -
  • Additionally, an instruction also control-depends on a terminator - instruction if the set of instructions it otherwise depends on would be - different if the terminator had transferred control to a different - successor.
  • - -
  • Dependence is transitive.
  • - -
- -

Poison Values have the same behavior as undef values, - with the additional affect that any instruction which has a dependence - on a poison value has undefined behavior.

- -

Here are some examples:

- -
-entry:
-  %poison = sub nuw i32 0, 1           ; Results in a poison value.
-  %still_poison = and i32 %poison, 0   ; 0, but also poison.
-  %poison_yet_again = getelementptr i32* @h, i32 %still_poison
-  store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
-
-  store i32 %poison, i32* @g           ; Poison value stored to memory.
-  %poison2 = load i32* @g              ; Poison value loaded back from memory.
-
-  store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
-
-  %narrowaddr = bitcast i32* @g to i16*
-  %wideaddr = bitcast i32* @g to i64*
-  %poison3 = load i16* %narrowaddr     ; Returns a poison value.
-  %poison4 = load i64* %wideaddr       ; Returns a poison value.
-
-  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
-  br i1 %cmp, label %true, label %end  ; Branch to either destination.
-
-true:
-  store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
-                                       ; it has undefined behavior.
-  br label %end
-
-end:
-  %p = phi i32 [ 0, %entry ], [ 1, %true ]
-                                       ; Both edges into this PHI are
-                                       ; control-dependent on %cmp, so this
-                                       ; always results in a poison value.
-
-  store volatile i32 0, i32* @g        ; This would depend on the store in %true
-                                       ; if %cmp is true, or the store in %entry
-                                       ; otherwise, so this is undefined behavior.
-
-  br i1 %cmp, label %second_true, label %second_end
-                                       ; The same branch again, but this time the
-                                       ; true block doesn't have side effects.
-
-second_true:
-  ; No side effects!
-  ret void
-
-second_end:
-  store volatile i32 0, i32* @g        ; This time, the instruction always depends
-                                       ; on the store in %end. Also, it is
-                                       ; control-equivalent to %end, so this is
-                                       ; well-defined (ignoring earlier undefined
-                                       ; behavior in this example).
-
- -
- - -

- Addresses of Basic Blocks -

- -
- -

blockaddress(@function, %block)

- -

The 'blockaddress' constant computes the address of the specified - basic block in the specified function, and always has an i8* type. Taking - the address of the entry block is illegal.

- -

This value only has defined behavior when used as an operand to the - 'indirectbr' instruction, or for - comparisons against null. Pointer equality tests between labels addresses - results in undefined behavior — though, again, comparison against null - is ok, and no label is equal to the null pointer. This may be passed around - as an opaque pointer sized value as long as the bits are not inspected. This - allows ptrtoint and arithmetic to be performed on these values so - long as the original value is reconstituted before the indirectbr - instruction.

- -

Finally, some targets may provide defined semantics when using the value as - the operand to an inline assembly, but that is target specific.

- -
- - - -

- Constant Expressions -

- -
- -

Constant expressions are used to allow expressions involving other constants - to be used as constants. Constant expressions may be of - any first class type and may involve any LLVM - operation that does not have side effects (e.g. load and call are not - supported). The following is the syntax for constant expressions:

- -
-
trunc (CST to TYPE)
-
Truncate a constant to another type. The bit size of CST must be larger - than the bit size of TYPE. Both types must be integers.
- -
zext (CST to TYPE)
-
Zero extend a constant to another type. The bit size of CST must be - smaller than the bit size of TYPE. Both types must be integers.
- -
sext (CST to TYPE)
-
Sign extend a constant to another type. The bit size of CST must be - smaller than the bit size of TYPE. Both types must be integers.
- -
fptrunc (CST to TYPE)
-
Truncate a floating point constant to another floating point type. The - size of CST must be larger than the size of TYPE. Both types must be - floating point.
- -
fpext (CST to TYPE)
-
Floating point extend a constant to another type. The size of CST must be - smaller or equal to the size of TYPE. Both types must be floating - point.
- -
fptoui (CST to TYPE)
-
Convert a floating point constant to the corresponding unsigned integer - constant. TYPE must be a scalar or vector integer type. CST must be of - scalar or vector floating point type. Both CST and TYPE must be scalars, - or vectors of the same number of elements. If the value won't fit in the - integer type, the results are undefined.
- -
fptosi (CST to TYPE)
-
Convert a floating point constant to the corresponding signed integer - constant. TYPE must be a scalar or vector integer type. CST must be of - scalar or vector floating point type. Both CST and TYPE must be scalars, - or vectors of the same number of elements. If the value won't fit in the - integer type, the results are undefined.
- -
uitofp (CST to TYPE)
-
Convert an unsigned integer constant to the corresponding floating point - constant. TYPE must be a scalar or vector floating point type. CST must be - of scalar or vector integer type. Both CST and TYPE must be scalars, or - vectors of the same number of elements. If the value won't fit in the - floating point type, the results are undefined.
- -
sitofp (CST to TYPE)
-
Convert a signed integer constant to the corresponding floating point - constant. TYPE must be a scalar or vector floating point type. CST must be - of scalar or vector integer type. Both CST and TYPE must be scalars, or - vectors of the same number of elements. If the value won't fit in the - floating point type, the results are undefined.
- -
ptrtoint (CST to TYPE)
-
Convert a pointer typed constant to the corresponding integer constant - TYPE must be an integer type. CST must be of pointer - type. The CST value is zero extended, truncated, or unchanged to - make it fit in TYPE.
- -
inttoptr (CST to TYPE)
-
Convert an integer constant to a pointer constant. TYPE must be a pointer - type. CST must be of integer type. The CST value is zero extended, - truncated, or unchanged to make it fit in a pointer size. This one is - really dangerous!
- -
bitcast (CST to TYPE)
-
Convert a constant, CST, to another TYPE. The constraints of the operands - are the same as those for the bitcast - instruction.
- -
getelementptr (CSTPTR, IDX0, IDX1, ...)
-
getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)
-
Perform the getelementptr operation on - constants. As with the getelementptr - instruction, the index list may have zero or more indexes, which are - required to make sense for the type of "CSTPTR".
- -
select (COND, VAL1, VAL2)
-
Perform the select operation on constants.
- -
icmp COND (VAL1, VAL2)
-
Performs the icmp operation on constants.
- -
fcmp COND (VAL1, VAL2)
-
Performs the fcmp operation on constants.
- -
extractelement (VAL, IDX)
-
Perform the extractelement operation on - constants.
- -
insertelement (VAL, ELT, IDX)
-
Perform the insertelement operation on - constants.
- -
shufflevector (VEC1, VEC2, IDXMASK)
-
Perform the shufflevector operation on - constants.
- -
extractvalue (VAL, IDX0, IDX1, ...)
-
Perform the extractvalue operation on - constants. The index list is interpreted in a similar manner as indices in - a 'getelementptr' operation. At least one - index value must be specified.
- -
insertvalue (VAL, ELT, IDX0, IDX1, ...)
-
Perform the insertvalue operation on - constants. The index list is interpreted in a similar manner as indices in - a 'getelementptr' operation. At least one - index value must be specified.
- -
OPCODE (LHS, RHS)
-
Perform the specified operation of the LHS and RHS constants. OPCODE may - be any of the binary - or bitwise binary operations. The constraints - on operands are the same as those for the corresponding instruction - (e.g. no bitwise operations on floating point values are allowed).
-
- -
- -
- - -

Other Values

- -
- -

-Inline Assembler Expressions -

- -
- -

LLVM supports inline assembler expressions (as opposed - to Module-Level Inline Assembly) through the use of - a special value. This value represents the inline assembler as a string - (containing the instructions to emit), a list of operand constraints (stored - as a string), a flag that indicates whether or not the inline asm - expression has side effects, and a flag indicating whether the function - containing the asm needs to align its stack conservatively. An example - inline assembler expression is:

- -
-i32 (i32) asm "bswap $0", "=r,r"
-
- -

Inline assembler expressions may only be used as the callee operand of - a call or an - invoke instruction. - Thus, typically we have:

- -
-%X = call i32 asm "bswap $0", "=r,r"(i32 %Y)
-
- -

Inline asms with side effects not visible in the constraint list must be - marked as having side effects. This is done through the use of the - 'sideeffect' keyword, like so:

- -
-call void asm sideeffect "eieio", ""()
-
- -

In some cases inline asms will contain code that will not work unless the - stack is aligned in some way, such as calls or SSE instructions on x86, - yet will not contain code that does that alignment within the asm. - The compiler should make conservative assumptions about what the asm might - contain and should generate its usual stack alignment code in the prologue - if the 'alignstack' keyword is present:

- -
-call void asm alignstack "eieio", ""()
-
- -

Inline asms also support using non-standard assembly dialects. The assumed - dialect is ATT. When the 'inteldialect' keyword is present, the - inline asm is using the Intel dialect. Currently, ATT and Intel are the - only supported dialects. An example is:

- -
-call void asm inteldialect "eieio", ""()
-
- -

If multiple keywords appear the 'sideeffect' keyword must come - first, the 'alignstack' keyword second and the - 'inteldialect' keyword last.

- - - - -

- Inline Asm Metadata -

- -
- -

The call instructions that wrap inline asm nodes may have a - "!srcloc" MDNode attached to it that contains a list of constant - integers. If present, the code generator will use the integer as the - location cookie value when report errors through the LLVMContext - error reporting mechanisms. This allows a front-end to correlate backend - errors that occur with inline asm back to the source code that produced it. - For example:

- -
-call void asm sideeffect "something bad", ""(), !srcloc !42
-...
-!42 = !{ i32 1234567 }
-
- -

It is up to the front-end to make sense of the magic numbers it places in the - IR. If the MDNode contains multiple constants, the code generator will use - the one that corresponds to the line of the asm that the error occurs on.

- -
- -
- - -

- Metadata Nodes and Metadata Strings -

- -
- -

LLVM IR allows metadata to be attached to instructions in the program that - can convey extra information about the code to the optimizers and code - generator. One example application of metadata is source-level debug - information. There are two metadata primitives: strings and nodes. All - metadata has the metadata type and is identified in syntax by a - preceding exclamation point ('!').

- -

A metadata string is a string surrounded by double quotes. It can contain - any character by escaping non-printable characters with "\xx" where - "xx" is the two digit hex code. For example: - "!"test\00"".

- -

Metadata nodes are represented with notation similar to structure constants - (a comma separated list of elements, surrounded by braces and preceded by an - exclamation point). Metadata nodes can have any values as their operand. For - example:

- -
-
-!{ metadata !"test\00", i32 10}
-
-
- -

A named metadata is a collection of - metadata nodes, which can be looked up in the module symbol table. For - example:

- -
-
-!foo =  metadata !{!4, !3}
-
-
- -

Metadata can be used as function arguments. Here llvm.dbg.value - function is using two metadata arguments:

- -
-
-call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
-
-
- -

Metadata can be attached with an instruction. Here metadata !21 is - attached to the add instruction using the !dbg - identifier:

- -
-
-%indvar.next = add i64 %indvar, 1, !dbg !21
-
-
- -

More information about specific metadata nodes recognized by the optimizers - and code generator is found below.

- - -

- 'tbaa' Metadata -

- -
- -

In LLVM IR, memory does not have types, so LLVM's own type system is not - suitable for doing TBAA. Instead, metadata is added to the IR to describe - a type system of a higher level language. This can be used to implement - typical C/C++ TBAA, but it can also be used to implement custom alias - analysis behavior for other languages.

- -

The current metadata format is very simple. TBAA metadata nodes have up to - three fields, e.g.:

- -
-
-!0 = metadata !{ metadata !"an example type tree" }
-!1 = metadata !{ metadata !"int", metadata !0 }
-!2 = metadata !{ metadata !"float", metadata !0 }
-!3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
-
-
- -

The first field is an identity field. It can be any value, usually - a metadata string, which uniquely identifies the type. The most important - name in the tree is the name of the root node. Two trees with - different root node names are entirely disjoint, even if they - have leaves with common names.

- -

The second field identifies the type's parent node in the tree, or - is null or omitted for a root node. A type is considered to alias - all of its descendants and all of its ancestors in the tree. Also, - a type is considered to alias all types in other trees, so that - bitcode produced from multiple front-ends is handled conservatively.

- -

If the third field is present, it's an integer which if equal to 1 - indicates that the type is "constant" (meaning - pointsToConstantMemory should return true; see - other useful - AliasAnalysis methods).

- -
- - -

- 'tbaa.struct' Metadata -

- -
- -

The llvm.memcpy is often used to implement -aggregate assignment operations in C and similar languages, however it is -defined to copy a contiguous region of memory, which is more than strictly -necessary for aggregate types which contain holes due to padding. Also, it -doesn't contain any TBAA information about the fields of the aggregate.

- -

!tbaa.struct metadata can describe which memory subregions in a memcpy -are padding and what the TBAA tags of the struct are.

- -

The current metadata format is very simple. !tbaa.struct metadata nodes - are a list of operands which are in conceptual groups of three. For each - group of three, the first operand gives the byte offset of a field in bytes, - the second gives its size in bytes, and the third gives its - tbaa tag. e.g.:

- -
-
-!4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 }
-
-
- -

This describes a struct with two fields. The first is at offset 0 bytes - with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes - and has size 4 bytes and has tbaa tag !2.

- -

Note that the fields need not be contiguous. In this example, there is a - 4 byte gap between the two fields. This gap represents padding which - does not carry useful data and need not be preserved.

- -
- - -

- 'fpmath' Metadata -

- -
- -

fpmath metadata may be attached to any instruction of floating point - type. It can be used to express the maximum acceptable error in the result of - that instruction, in ULPs, thus potentially allowing the compiler to use a - more efficient but less accurate method of computing it. ULP is defined as - follows:

- -
- -

If x is a real number that lies between two finite consecutive - floating-point numbers a and b, without being equal to one - of them, then ulp(x) = |b - a|, otherwise ulp(x) is the - distance between the two non-equal finite floating-point numbers nearest - x. Moreover, ulp(NaN) is NaN.

- -
- -

The metadata node shall consist of a single positive floating point number - representing the maximum relative error, for example:

- -
-
-!0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
-
-
- -
- - -

- 'range' Metadata -

- -
-

range metadata may be attached only to loads of integer types. It - expresses the possible ranges the loaded value is in. The ranges are - represented with a flattened list of integers. The loaded value is known to - be in the union of the ranges defined by each consecutive pair. Each pair - has the following properties:

-
    -
  • The type must match the type loaded by the instruction.
  • -
  • The pair a,b represents the range [a,b).
  • -
  • Both a and b are constants.
  • -
  • The range is allowed to wrap.
  • -
  • The range should not represent the full or empty set. That is, - a!=b.
  • -
-

In addition, the pairs must be in signed order of the lower bound and - they must be non-contiguous.

- -

Examples:

-
-
-  %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
-  %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
-  %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
-  %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
-...
-!0 = metadata !{ i8 0, i8 2 }
-!1 = metadata !{ i8 255, i8 2 }
-!2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
-!3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 }
-
-
-
-
- -
- - -

- Module Flags Metadata -

- - -
- -

Information about the module as a whole is difficult to convey to LLVM's - subsystems. The LLVM IR isn't sufficient to transmit this - information. The llvm.module.flags named metadata exists in order to - facilitate this. These flags are in the form of key / value pairs — - much like a dictionary — making it easy for any subsystem who cares - about a flag to look it up.

- -

The llvm.module.flags metadata contains a list of metadata - triplets. Each triplet has the following form:

- -
    -
  • The first element is a behavior flag, which specifies the behavior - when two (or more) modules are merged together, and it encounters two (or - more) metadata with the same ID. The supported behaviors are described - below.
  • - -
  • The second element is a metadata string that is a unique ID for the - metadata. How each ID is interpreted is documented below.
  • - -
  • The third element is the value of the flag.
  • -
- -

When two (or more) modules are merged together, the resulting - llvm.module.flags metadata is the union of the - modules' llvm.module.flags metadata. The only exception being a flag - with the Override behavior, which may override another flag's value - (see below).

- -

The following behaviors are supported:

- - - - - - - - - - - - - - - - - - - - - - - - -
ValueBehavior
1 -
-
Error
-
Emits an error if two values disagree. It is an error to have an ID - with both an Error and a Warning behavior.
-
-
2 -
-
Warning
-
Emits a warning if two values disagree.
-
-
3 -
-
Require
-
Emits an error when the specified value is not present or doesn't - have the specified value. It is an error for two (or more) - llvm.module.flags with the same ID to have the Require - behavior but different values. There may be multiple Require flags - per ID.
-
-
4 -
-
Override
-
Uses the specified value if the two values disagree. It is an - error for two (or more) llvm.module.flags with the same - ID to have the Override behavior but different values.
-
-
- -

An example of module flags:

- -
-!0 = metadata !{ i32 1, metadata !"foo", i32 1 }
-!1 = metadata !{ i32 4, metadata !"bar", i32 37 }
-!2 = metadata !{ i32 2, metadata !"qux", i32 42 }
-!3 = metadata !{ i32 3, metadata !"qux",
-  metadata !{
-    metadata !"foo", i32 1
-  }
-}
-!llvm.module.flags = !{ !0, !1, !2, !3 }
-
- -
    -
  • Metadata !0 has the ID !"foo" and the value '1'. The - behavior if two or more !"foo" flags are seen is to emit an - error if their values are not equal.

  • - -
  • Metadata !1 has the ID !"bar" and the value '37'. The - behavior if two or more !"bar" flags are seen is to use the - value '37' if their values are not equal.

  • - -
  • Metadata !2 has the ID !"qux" and the value '42'. The - behavior if two or more !"qux" flags are seen is to emit a - warning if their values are not equal.

  • - -
  • Metadata !3 has the ID !"qux" and the value:

    - -
    -metadata !{ metadata !"foo", i32 1 }
    -
    - -

    The behavior is to emit an error if the llvm.module.flags does - not contain a flag with the ID !"foo" that has the value - '1'. If two or more !"qux" flags exist, then they must have - the same value or an error will be issued.

  • -
- - - -

-Objective-C Garbage Collection Module Flags Metadata -

- -
- -

On the Mach-O platform, Objective-C stores metadata about garbage collection - in a special section called "image info". The metadata consists of a version - number and a bitmask specifying what types of garbage collection are - supported (if any) by the file. If two or more modules are linked together - their garbage collection metadata needs to be merged rather than appended - together.

- -

The Objective-C garbage collection module flags metadata consists of the - following key-value pairs:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
KeyValue
Objective-C Version[Required] — The Objective-C ABI - version. Valid values are 1 and 2.
Objective-C Image Info Version[Required] — The version of the image info - section. Currently always 0.
Objective-C Image Info Section[Required] — The section to place the - metadata. Valid values are "__OBJC, __image_info, regular" for - Objective-C ABI version 1, and "__DATA,__objc_imageinfo, regular, - no_dead_strip" for Objective-C ABI version 2.
Objective-C Garbage Collection[Required] — Specifies whether garbage - collection is supported or not. Valid values are 0, for no garbage - collection, and 2, for garbage collection supported.
Objective-C GC Only[Optional] — Specifies that only garbage - collection is supported. If present, its value must be 6. This flag - requires that the Objective-C Garbage Collection flag have the - value 2.
- -

Some important flag interactions:

- -
    -
  • If a module with Objective-C Garbage Collection set to 0 is - merged with a module with Objective-C Garbage Collection set to - 2, then the resulting module has the Objective-C Garbage - Collection flag set to 0.
  • - -
  • A module with Objective-C Garbage Collection set to 0 cannot be - merged with a module with Objective-C GC Only set to 6.
  • -
- -
- -
- - -

- Intrinsic Global Variables -

- -
-

LLVM has a number of "magic" global variables that contain data that affect -code generation or other IR semantics. These are documented here. All globals -of this sort should have a section specified as "llvm.metadata". This -section and all globals that start with "llvm." are reserved for use -by LLVM.

- - -

-The 'llvm.used' Global Variable -

- -
- -

The @llvm.used global is an array with i8* element type which has appending linkage. This array contains a list of -pointers to global variables and functions which may optionally have a pointer -cast formed of bitcast or getelementptr. For example, a legal use of it is:

- -
-
-@X = global i8 4
-@Y = global i32 123
-
-@llvm.used = appending global [2 x i8*] [
-   i8* @X,
-   i8* bitcast (i32* @Y to i8*)
-], section "llvm.metadata"
-
-
- -

If a global variable appears in the @llvm.used list, then the - compiler, assembler, and linker are required to treat the symbol as if there - is a reference to the global that it cannot see. For example, if a variable - has internal linkage and no references other than that from - the @llvm.used list, it cannot be deleted. This is commonly used to - represent references from inline asms and other things the compiler cannot - "see", and corresponds to "attribute((used))" in GNU C.

- -

On some targets, the code generator must emit a directive to the assembler or - object file to prevent the assembler and linker from molesting the - symbol.

- -
- - -

- - The 'llvm.compiler.used' Global Variable - -

- -
- -

The @llvm.compiler.used directive is the same as the - @llvm.used directive, except that it only prevents the compiler from - touching the symbol. On targets that support it, this allows an intelligent - linker to optimize references to the symbol without being impeded as it would - be by @llvm.used.

- -

This is a rare construct that should only be used in rare circumstances, and - should not be exposed to source languages.

- -
- - -

-The 'llvm.global_ctors' Global Variable -

- -
- -
-
-%0 = type { i32, void ()* }
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
-
-
- -

The @llvm.global_ctors array contains a list of constructor - functions and associated priorities. The functions referenced by this array - will be called in ascending order of priority (i.e. lowest first) when the - module is loaded. The order of functions with the same priority is not - defined.

- -
- - -

-The 'llvm.global_dtors' Global Variable -

- -
- -
-
-%0 = type { i32, void ()* }
-@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
-
-
- -

The @llvm.global_dtors array contains a list of destructor functions - and associated priorities. The functions referenced by this array will be - called in descending order of priority (i.e. highest first) when the module - is loaded. The order of functions with the same priority is not defined.

- -
- -
- - -

Instruction Reference

- - -
- -

The LLVM instruction set consists of several different classifications of - instructions: terminator - instructions, binary instructions, - bitwise binary instructions, - memory instructions, and - other instructions.

- - -

- Terminator Instructions -

- -
- -

As mentioned previously, every basic block - in a program ends with a "Terminator" instruction, which indicates which - block should be executed after the current block is finished. These - terminator instructions typically yield a 'void' value: they produce - control flow, not values (the one exception being the - 'invoke' instruction).

- -

The terminator instructions are: - 'ret', - 'br', - 'switch', - 'indirectbr', - 'invoke', - 'resume', and - 'unreachable'.

- - -

- 'ret' Instruction -

- -
- -
Syntax:
-
-  ret <type> <value>       ; Return a value from a non-void function
-  ret void                 ; Return from void function
-
- -
Overview:
-

The 'ret' instruction is used to return control flow (and optionally - a value) from a function back to the caller.

- -

There are two forms of the 'ret' instruction: one that returns a - value and then causes control flow, and one that just causes control flow to - occur.

- -
Arguments:
-

The 'ret' instruction optionally accepts a single argument, the - return value. The type of the return value must be a - 'first class' type.

- -

A function is not well formed if it it has a - non-void return type and contains a 'ret' instruction with no return - value or a return value with a type that does not match its type, or if it - has a void return type and contains a 'ret' instruction with a - return value.

- -
Semantics:
-

When the 'ret' instruction is executed, control flow returns back to - the calling function's context. If the caller is a - "call" instruction, execution continues at the - instruction after the call. If the caller was an - "invoke" instruction, execution continues at - the beginning of the "normal" destination block. If the instruction returns - a value, that value shall set the call or invoke instruction's return - value.

- -
Example:
-
-  ret i32 5                       ; Return an integer value of 5
-  ret void                        ; Return from a void function
-  ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2
-
- -
- -

- 'br' Instruction -

- -
- -
Syntax:
-
-  br i1 <cond>, label <iftrue>, label <iffalse>
-  br label <dest>          ; Unconditional branch
-
- -
Overview:
-

The 'br' instruction is used to cause control flow to transfer to a - different basic block in the current function. There are two forms of this - instruction, corresponding to a conditional branch and an unconditional - branch.

- -
Arguments:
-

The conditional branch form of the 'br' instruction takes a single - 'i1' value and two 'label' values. The unconditional form - of the 'br' instruction takes a single 'label' value as a - target.

- -
Semantics:
-

Upon execution of a conditional 'br' instruction, the 'i1' - argument is evaluated. If the value is true, control flows to the - 'iftrue' label argument. If "cond" is false, - control flows to the 'iffalse' label argument.

- -
Example:
-
-Test:
-  %cond = icmp eq i32 %a, %b
-  br i1 %cond, label %IfEqual, label %IfUnequal
-IfEqual:
-  ret i32 1
-IfUnequal:
-  ret i32 0
-
- -
- - -

- 'switch' Instruction -

- -
- -
Syntax:
-
-  switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]
-
- -
Overview:
-

The 'switch' instruction is used to transfer control flow to one of - several different places. It is a generalization of the 'br' - instruction, allowing a branch to occur to one of many possible - destinations.

- -
Arguments:
-

The 'switch' instruction uses three parameters: an integer - comparison value 'value', a default 'label' destination, - and an array of pairs of comparison value constants and 'label's. - The table is not allowed to contain duplicate constant entries.

- -
Semantics:
-

The switch instruction specifies a table of values and - destinations. When the 'switch' instruction is executed, this table - is searched for the given value. If the value is found, control flow is - transferred to the corresponding destination; otherwise, control flow is - transferred to the default destination.

- -
Implementation:
-

Depending on properties of the target machine and the particular - switch instruction, this instruction may be code generated in - different ways. For example, it could be generated as a series of chained - conditional branches or with a lookup table.

- -
Example:
-
- ; Emulate a conditional br instruction
- %Val = zext i1 %value to i32
- switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
-
- ; Emulate an unconditional br instruction
- switch i32 0, label %dest [ ]
-
- ; Implement a jump table:
- switch i32 %val, label %otherwise [ i32 0, label %onzero
-                                     i32 1, label %onone
-                                     i32 2, label %ontwo ]
-
- -
- - - -

- 'indirectbr' Instruction -

- -
- -
Syntax:
-
-  indirectbr <somety>* <address>, [ label <dest1>, label <dest2>, ... ]
-
- -
Overview:
- -

The 'indirectbr' instruction implements an indirect branch to a label - within the current function, whose address is specified by - "address". Address must be derived from a blockaddress constant.

- -
Arguments:
- -

The 'address' argument is the address of the label to jump to. The - rest of the arguments indicate the full set of possible destinations that the - address may point to. Blocks are allowed to occur multiple times in the - destination list, though this isn't particularly useful.

- -

This destination list is required so that dataflow analysis has an accurate - understanding of the CFG.

- -
Semantics:
- -

Control transfers to the block specified in the address argument. All - possible destination blocks must be listed in the label list, otherwise this - instruction has undefined behavior. This implies that jumps to labels - defined in other functions have undefined behavior as well.

- -
Implementation:
- -

This is typically implemented with a jump through a register.

- -
Example:
-
- indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
-
- -
- - - -

- 'invoke' Instruction -

- -
- -
Syntax:
-
-  <result> = invoke [cconv] [ret attrs] <ptr to function ty> <function ptr val>(<function args>) [fn attrs]
-                to label <normal label> unwind label <exception label>
-
- -
Overview:
-

The 'invoke' instruction causes control to transfer to a specified - function, with the possibility of control flow transfer to either the - 'normal' label or the 'exception' label. If the callee - function returns with the "ret" instruction, - control flow will return to the "normal" label. If the callee (or any - indirect callees) returns via the "resume" - instruction or other exception handling mechanism, control is interrupted and - continued at the dynamically nearest "exception" label.

- -

The 'exception' label is a - landing pad for the - exception. As such, 'exception' label is required to have the - "landingpad" instruction, which contains - the information about the behavior of the program after unwinding - happens, as its first non-PHI instruction. The restrictions on the - "landingpad" instruction's tightly couples it to the - "invoke" instruction, so that the important information contained - within the "landingpad" instruction can't be lost through normal - code motion.

- -
Arguments:
-

This instruction requires several arguments:

- -
    -
  1. The optional "cconv" marker indicates which calling - convention the call should use. If none is specified, the call - defaults to using C calling conventions.
  2. - -
  3. The optional Parameter Attributes list for - return values. Only 'zeroext', 'signext', and - 'inreg' attributes are valid here.
  4. - -
  5. 'ptr to function ty': shall be the signature of the pointer to - function value being invoked. In most cases, this is a direct function - invocation, but indirect invokes are just as possible, branching - off an arbitrary pointer to function value.
  6. - -
  7. 'function ptr val': An LLVM value containing a pointer to a - function to be invoked.
  8. - -
  9. 'function args': argument list whose types match the function - signature argument types and parameter attributes. All arguments must be - of first class type. If the function - signature indicates the function accepts a variable number of arguments, - the extra arguments can be specified.
  10. - -
  11. 'normal label': the label reached when the called function - executes a 'ret' instruction.
  12. - -
  13. 'exception label': the label reached when a callee returns via - the resume instruction or other exception - handling mechanism.
  14. - -
  15. The optional function attributes list. Only - 'noreturn', 'nounwind', 'readonly' and - 'readnone' attributes are valid here.
  16. -
- -
Semantics:
-

This instruction is designed to operate as a standard - 'call' instruction in most regards. The - primary difference is that it establishes an association with a label, which - is used by the runtime library to unwind the stack.

- -

This instruction is used in languages with destructors to ensure that proper - cleanup is performed in the case of either a longjmp or a thrown - exception. Additionally, this is important for implementation of - 'catch' clauses in high-level languages that support them.

- -

For the purposes of the SSA form, the definition of the value returned by the - 'invoke' instruction is deemed to occur on the edge from the current - block to the "normal" label. If the callee unwinds then no return value is - available.

- -
Example:
-
-  %retval = invoke i32 @Test(i32 15) to label %Continue
-              unwind label %TestCleanup              ; {i32}:retval set
-  %retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue
-              unwind label %TestCleanup              ; {i32}:retval set
-
- -
- - - -

- 'resume' Instruction -

- -
- -
Syntax:
-
-  resume <type> <value>
-
- -
Overview:
-

The 'resume' instruction is a terminator instruction that has no - successors.

- -
Arguments:
-

The 'resume' instruction requires one argument, which must have the - same type as the result of any 'landingpad' instruction in the same - function.

- -
Semantics:
-

The 'resume' instruction resumes propagation of an existing - (in-flight) exception whose unwinding was interrupted with - a landingpad instruction.

- -
Example:
-
-  resume { i8*, i32 } %exn
-
- -
- - - -

- 'unreachable' Instruction -

- -
- -
Syntax:
-
-  unreachable
-
- -
Overview:
-

The 'unreachable' instruction has no defined semantics. This - instruction is used to inform the optimizer that a particular portion of the - code is not reachable. This can be used to indicate that the code after a - no-return function cannot be reached, and other facts.

- -
Semantics:
-

The 'unreachable' instruction has no defined semantics.

- -
- -
- - -

- Binary Operations -

- -
- -

Binary operators are used to do most of the computation in a program. They - require two operands of the same type, execute an operation on them, and - produce a single value. The operands might represent multiple data, as is - the case with the vector data type. The result value - has the same type as its operands.

- -

There are several different binary operators:

- - -

- 'add' Instruction -

- -
- -
Syntax:
-
-  <result> = add <ty> <op1>, <op2>          ; yields {ty}:result
-  <result> = add nuw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = add nsw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = add nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
-
- -
Overview:
-

The 'add' instruction returns the sum of its two operands.

- -
Arguments:
-

The two arguments to the 'add' instruction must - be integer or vector of - integer values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the integer sum of the two operands.

- -

If the sum has unsigned overflow, the result returned is the mathematical - result modulo 2n, where n is the bit width of the result.

- -

Because LLVM integers use a two's complement representation, this instruction - is appropriate for both signed and unsigned integers.

- -

nuw and nsw stand for "No Unsigned Wrap" - and "No Signed Wrap", respectively. If the nuw and/or - nsw keywords are present, the result value of the add - is a poison value if unsigned and/or signed overflow, - respectively, occurs.

- -
Example:
-
-  <result> = add i32 4, %var          ; yields {i32}:result = 4 + %var
-
- -
- - -

- 'fadd' Instruction -

- -
- -
Syntax:
-
-  <result> = fadd <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'fadd' instruction returns the sum of its two operands.

- -
Arguments:
-

The two arguments to the 'fadd' instruction must be - floating point or vector of - floating point values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the floating point sum of the two operands.

- -
Example:
-
-  <result> = fadd float 4.0, %var          ; yields {float}:result = 4.0 + %var
-
- -
- - -

- 'sub' Instruction -

- -
- -
Syntax:
-
-  <result> = sub <ty> <op1>, <op2>          ; yields {ty}:result
-  <result> = sub nuw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = sub nsw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = sub nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
-
- -
Overview:
-

The 'sub' instruction returns the difference of its two - operands.

- -

Note that the 'sub' instruction is used to represent the - 'neg' instruction present in most other intermediate - representations.

- -
Arguments:
-

The two arguments to the 'sub' instruction must - be integer or vector of - integer values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the integer difference of the two operands.

- -

If the difference has unsigned overflow, the result returned is the - mathematical result modulo 2n, where n is the bit width of the - result.

- -

Because LLVM integers use a two's complement representation, this instruction - is appropriate for both signed and unsigned integers.

- -

nuw and nsw stand for "No Unsigned Wrap" - and "No Signed Wrap", respectively. If the nuw and/or - nsw keywords are present, the result value of the sub - is a poison value if unsigned and/or signed overflow, - respectively, occurs.

- -
Example:
-
-  <result> = sub i32 4, %var          ; yields {i32}:result = 4 - %var
-  <result> = sub i32 0, %val          ; yields {i32}:result = -%var
-
- -
- - -

- 'fsub' Instruction -

- -
- -
Syntax:
-
-  <result> = fsub <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'fsub' instruction returns the difference of its two - operands.

- -

Note that the 'fsub' instruction is used to represent the - 'fneg' instruction present in most other intermediate - representations.

- -
Arguments:
-

The two arguments to the 'fsub' instruction must be - floating point or vector of - floating point values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the floating point difference of the two operands.

- -
Example:
-
-  <result> = fsub float 4.0, %var           ; yields {float}:result = 4.0 - %var
-  <result> = fsub float -0.0, %val          ; yields {float}:result = -%var
-
- -
- - -

- 'mul' Instruction -

- -
- -
Syntax:
-
-  <result> = mul <ty> <op1>, <op2>          ; yields {ty}:result
-  <result> = mul nuw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = mul nsw <ty> <op1>, <op2>      ; yields {ty}:result
-  <result> = mul nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
-
- -
Overview:
-

The 'mul' instruction returns the product of its two operands.

- -
Arguments:
-

The two arguments to the 'mul' instruction must - be integer or vector of - integer values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the integer product of the two operands.

- -

If the result of the multiplication has unsigned overflow, the result - returned is the mathematical result modulo 2n, where n is the bit - width of the result.

- -

Because LLVM integers use a two's complement representation, and the result - is the same width as the operands, this instruction returns the correct - result for both signed and unsigned integers. If a full product - (e.g. i32xi32->i64) is needed, the operands should - be sign-extended or zero-extended as appropriate to the width of the full - product.

- -

nuw and nsw stand for "No Unsigned Wrap" - and "No Signed Wrap", respectively. If the nuw and/or - nsw keywords are present, the result value of the mul - is a poison value if unsigned and/or signed overflow, - respectively, occurs.

- -
Example:
-
-  <result> = mul i32 4, %var          ; yields {i32}:result = 4 * %var
-
- -
- - -

- 'fmul' Instruction -

- -
- -
Syntax:
-
-  <result> = fmul <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'fmul' instruction returns the product of its two operands.

- -
Arguments:
-

The two arguments to the 'fmul' instruction must be - floating point or vector of - floating point values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the floating point product of the two operands.

- -
Example:
-
-  <result> = fmul float 4.0, %var          ; yields {float}:result = 4.0 * %var
-
- -
- - -

- 'udiv' Instruction -

- -
- -
Syntax:
-
-  <result> = udiv <ty> <op1>, <op2>         ; yields {ty}:result
-  <result> = udiv exact <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'udiv' instruction returns the quotient of its two operands.

- -
Arguments:
-

The two arguments to the 'udiv' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the unsigned integer quotient of the two operands.

- -

Note that unsigned integer division and signed integer division are distinct - operations; for signed integer division, use 'sdiv'.

- -

Division by zero leads to undefined behavior.

- -

If the exact keyword is present, the result value of the - udiv is a poison value if %op1 is not a - multiple of %op2 (as such, "((a udiv exact b) mul b) == a").

- - -
Example:
-
-  <result> = udiv i32 4, %var          ; yields {i32}:result = 4 / %var
-
- -
- - -

- 'sdiv' Instruction -

- -
- -
Syntax:
-
-  <result> = sdiv <ty> <op1>, <op2>         ; yields {ty}:result
-  <result> = sdiv exact <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'sdiv' instruction returns the quotient of its two operands.

- -
Arguments:
-

The two arguments to the 'sdiv' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the signed integer quotient of the two operands rounded - towards zero.

- -

Note that signed integer division and unsigned integer division are distinct - operations; for unsigned integer division, use 'udiv'.

- -

Division by zero leads to undefined behavior. Overflow also leads to - undefined behavior; this is a rare case, but can occur, for example, by doing - a 32-bit division of -2147483648 by -1.

- -

If the exact keyword is present, the result value of the - sdiv is a poison value if the result would - be rounded.

- -
Example:
-
-  <result> = sdiv i32 4, %var          ; yields {i32}:result = 4 / %var
-
- -
- - -

- 'fdiv' Instruction -

- -
- -
Syntax:
-
-  <result> = fdiv <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'fdiv' instruction returns the quotient of its two operands.

- -
Arguments:
-

The two arguments to the 'fdiv' instruction must be - floating point or vector of - floating point values. Both arguments must have identical types.

- -
Semantics:
-

The value produced is the floating point quotient of the two operands.

- -
Example:
-
-  <result> = fdiv float 4.0, %var          ; yields {float}:result = 4.0 / %var
-
- -
- - -

- 'urem' Instruction -

- -
- -
Syntax:
-
-  <result> = urem <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'urem' instruction returns the remainder from the unsigned - division of its two arguments.

- -
Arguments:
-

The two arguments to the 'urem' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

This instruction returns the unsigned integer remainder of a division. - This instruction always performs an unsigned division to get the - remainder.

- -

Note that unsigned integer remainder and signed integer remainder are - distinct operations; for signed integer remainder, use 'srem'.

- -

Taking the remainder of a division by zero leads to undefined behavior.

- -
Example:
-
-  <result> = urem i32 4, %var          ; yields {i32}:result = 4 % %var
-
- -
- - -

- 'srem' Instruction -

- -
- -
Syntax:
-
-  <result> = srem <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'srem' instruction returns the remainder from the signed - division of its two operands. This instruction can also take - vector versions of the values in which case the - elements must be integers.

- -
Arguments:
-

The two arguments to the 'srem' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

This instruction returns the remainder of a division (where the result - is either zero or has the same sign as the dividend, op1), not the - modulo operator (where the result is either zero or has the same sign - as the divisor, op2) of a value. - For more information about the difference, - see The - Math Forum. For a table of how this is implemented in various languages, - please see - Wikipedia: modulo operation.

- -

Note that signed integer remainder and unsigned integer remainder are - distinct operations; for unsigned integer remainder, use 'urem'.

- -

Taking the remainder of a division by zero leads to undefined behavior. - Overflow also leads to undefined behavior; this is a rare case, but can - occur, for example, by taking the remainder of a 32-bit division of - -2147483648 by -1. (The remainder doesn't actually overflow, but this rule - lets srem be implemented using instructions that return both the result of - the division and the remainder.)

- -
Example:
-
-  <result> = srem i32 4, %var          ; yields {i32}:result = 4 % %var
-
- -
- - -

- 'frem' Instruction -

- -
- -
Syntax:
-
-  <result> = frem <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'frem' instruction returns the remainder from the division of - its two operands.

- -
Arguments:
-

The two arguments to the 'frem' instruction must be - floating point or vector of - floating point values. Both arguments must have identical types.

- -
Semantics:
-

This instruction returns the remainder of a division. The remainder - has the same sign as the dividend.

- -
Example:
-
-  <result> = frem float 4.0, %var          ; yields {float}:result = 4.0 % %var
-
- -
- -
- - -

- Bitwise Binary Operations -

- -
- -

Bitwise binary operators are used to do various forms of bit-twiddling in a - program. They are generally very efficient instructions and can commonly be - strength reduced from other instructions. They require two operands of the - same type, execute an operation on them, and produce a single value. The - resulting value is the same type as its operands.

- - -

- 'shl' Instruction -

- -
- -
Syntax:
-
-  <result> = shl <ty> <op1>, <op2>           ; yields {ty}:result
-  <result> = shl nuw <ty> <op1>, <op2>       ; yields {ty}:result
-  <result> = shl nsw <ty> <op1>, <op2>       ; yields {ty}:result
-  <result> = shl nuw nsw <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'shl' instruction returns the first operand shifted to the left - a specified number of bits.

- -
Arguments:
-

Both arguments to the 'shl' instruction must be the - same integer or vector of - integer type. 'op2' is treated as an unsigned value.

- -
Semantics:
-

The value produced is op1 * 2op2 mod - 2n, where n is the width of the result. If op2 - is (statically or dynamically) negative or equal to or larger than the number - of bits in op1, the result is undefined. If the arguments are - vectors, each vector element of op1 is shifted by the corresponding - shift amount in op2.

- -

If the nuw keyword is present, then the shift produces a - poison value if it shifts out any non-zero bits. If - the nsw keyword is present, then the shift produces a - poison value if it shifts out any bits that disagree - with the resultant sign bit. As such, NUW/NSW have the same semantics as - they would if the shift were expressed as a mul instruction with the same - nsw/nuw bits in (mul %op1, (shl 1, %op2)).

- -
Example:
-
-  <result> = shl i32 4, %var   ; yields {i32}: 4 << %var
-  <result> = shl i32 4, 2      ; yields {i32}: 16
-  <result> = shl i32 1, 10     ; yields {i32}: 1024
-  <result> = shl i32 1, 32     ; undefined
-  <result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 2, i32 4>
-
- -
- - -

- 'lshr' Instruction -

- -
- -
Syntax:
-
-  <result> = lshr <ty> <op1>, <op2>         ; yields {ty}:result
-  <result> = lshr exact <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'lshr' instruction (logical shift right) returns the first - operand shifted to the right a specified number of bits with zero fill.

- -
Arguments:
-

Both arguments to the 'lshr' instruction must be the same - integer or vector of integer - type. 'op2' is treated as an unsigned value.

- -
Semantics:
-

This instruction always performs a logical shift right operation. The most - significant bits of the result will be filled with zero bits after the shift. - If op2 is (statically or dynamically) equal to or larger than the - number of bits in op1, the result is undefined. If the arguments are - vectors, each vector element of op1 is shifted by the corresponding - shift amount in op2.

- -

If the exact keyword is present, the result value of the - lshr is a poison value if any of the bits - shifted out are non-zero.

- - -
Example:
-
-  <result> = lshr i32 4, 1   ; yields {i32}:result = 2
-  <result> = lshr i32 4, 2   ; yields {i32}:result = 1
-  <result> = lshr i8  4, 3   ; yields {i8}:result = 0
-  <result> = lshr i8 -2, 1   ; yields {i8}:result = 0x7FFFFFFF 
-  <result> = lshr i32 1, 32  ; undefined
-  <result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
-
- -
- - -

- 'ashr' Instruction -

- -
- -
Syntax:
-
-  <result> = ashr <ty> <op1>, <op2>         ; yields {ty}:result
-  <result> = ashr exact <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'ashr' instruction (arithmetic shift right) returns the first - operand shifted to the right a specified number of bits with sign - extension.

- -
Arguments:
-

Both arguments to the 'ashr' instruction must be the same - integer or vector of integer - type. 'op2' is treated as an unsigned value.

- -
Semantics:
-

This instruction always performs an arithmetic shift right operation, The - most significant bits of the result will be filled with the sign bit - of op1. If op2 is (statically or dynamically) equal to or - larger than the number of bits in op1, the result is undefined. If - the arguments are vectors, each vector element of op1 is shifted by - the corresponding shift amount in op2.

- -

If the exact keyword is present, the result value of the - ashr is a poison value if any of the bits - shifted out are non-zero.

- -
Example:
-
-  <result> = ashr i32 4, 1   ; yields {i32}:result = 2
-  <result> = ashr i32 4, 2   ; yields {i32}:result = 1
-  <result> = ashr i8  4, 3   ; yields {i8}:result = 0
-  <result> = ashr i8 -2, 1   ; yields {i8}:result = -1
-  <result> = ashr i32 1, 32  ; undefined
-  <result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3>   ; yields: result=<2 x i32> < i32 -1, i32 0>
-
- -
- - -

- 'and' Instruction -

- -
- -
Syntax:
-
-  <result> = and <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'and' instruction returns the bitwise logical and of its two - operands.

- -
Arguments:
-

The two arguments to the 'and' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

The truth table used for the 'and' instruction is:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
In0In1Out
000
010
100
111
- -
Example:
-
-  <result> = and i32 4, %var         ; yields {i32}:result = 4 & %var
-  <result> = and i32 15, 40          ; yields {i32}:result = 8
-  <result> = and i32 4, 8            ; yields {i32}:result = 0
-
-
- -

- 'or' Instruction -

- -
- -
Syntax:
-
-  <result> = or <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'or' instruction returns the bitwise logical inclusive or of its - two operands.

- -
Arguments:
-

The two arguments to the 'or' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

The truth table used for the 'or' instruction is:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
In0In1Out
000
011
101
111
- -
Example:
-
-  <result> = or i32 4, %var         ; yields {i32}:result = 4 | %var
-  <result> = or i32 15, 40          ; yields {i32}:result = 47
-  <result> = or i32 4, 8            ; yields {i32}:result = 12
-
- -
- - -

- 'xor' Instruction -

- -
- -
Syntax:
-
-  <result> = xor <ty> <op1>, <op2>   ; yields {ty}:result
-
- -
Overview:
-

The 'xor' instruction returns the bitwise logical exclusive or of - its two operands. The xor is used to implement the "one's - complement" operation, which is the "~" operator in C.

- -
Arguments:
-

The two arguments to the 'xor' instruction must be - integer or vector of integer - values. Both arguments must have identical types.

- -
Semantics:
-

The truth table used for the 'xor' instruction is:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
In0In1Out
000
011
101
110
- -
Example:
-
-  <result> = xor i32 4, %var         ; yields {i32}:result = 4 ^ %var
-  <result> = xor i32 15, 40          ; yields {i32}:result = 39
-  <result> = xor i32 4, 8            ; yields {i32}:result = 12
-  <result> = xor i32 %V, -1          ; yields {i32}:result = ~%V
-
- -
- -
- - -

- Vector Operations -

- -
- -

LLVM supports several instructions to represent vector operations in a - target-independent manner. These instructions cover the element-access and - vector-specific operations needed to process vectors effectively. While LLVM - does directly support these vector operations, many sophisticated algorithms - will want to use target-specific intrinsics to take full advantage of a - specific target.

- - -

- 'extractelement' Instruction -

- -
- -
Syntax:
-
-  <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
-
- -
Overview:
-

The 'extractelement' instruction extracts a single scalar element - from a vector at a specified index.

- - -
Arguments:
-

The first operand of an 'extractelement' instruction is a value - of vector type. The second operand is an index - indicating the position from which to extract the element. The index may be - a variable.

- -
Semantics:
-

The result is a scalar of the same type as the element type of - val. Its value is the value at position idx of - val. If idx exceeds the length of val, the - results are undefined.

- -
Example:
-
-  <result> = extractelement <4 x i32> %vec, i32 0    ; yields i32
-
- -
- - -

- 'insertelement' Instruction -

- -
- -
Syntax:
-
-  <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
-
- -
Overview:
-

The 'insertelement' instruction inserts a scalar element into a - vector at a specified index.

- -
Arguments:
-

The first operand of an 'insertelement' instruction is a value - of vector type. The second operand is a scalar value - whose type must equal the element type of the first operand. The third - operand is an index indicating the position at which to insert the value. - The index may be a variable.

- -
Semantics:
-

The result is a vector of the same type as val. Its element values - are those of val except at position idx, where it gets the - value elt. If idx exceeds the length of val, the - results are undefined.

- -
Example:
-
-  <result> = insertelement <4 x i32> %vec, i32 1, i32 0    ; yields <4 x i32>
-
- -
- - -

- 'shufflevector' Instruction -

- -
- -
Syntax:
-
-  <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
-
- -
Overview:
-

The 'shufflevector' instruction constructs a permutation of elements - from two input vectors, returning a vector with the same element type as the - input and length that is the same as the shuffle mask.

- -
Arguments:
-

The first two operands of a 'shufflevector' instruction are vectors - with the same type. The third argument is a shuffle mask whose - element type is always 'i32'. The result of the instruction is a vector - whose length is the same as the shuffle mask and whose element type is the - same as the element type of the first two operands.

- -

The shuffle mask operand is required to be a constant vector with either - constant integer or undef values.

- -
Semantics:
-

The elements of the two input vectors are numbered from left to right across - both of the vectors. The shuffle mask operand specifies, for each element of - the result vector, which element of the two input vectors the result element - gets. The element selector may be undef (meaning "don't care") and the - second operand may be undef if performing a shuffle from only one vector.

- -
Example:
-
-  <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
-                          <4 x i32> <i32 0, i32 4, i32 1, i32 5>  ; yields <4 x i32>
-  <result> = shufflevector <4 x i32> %v1, <4 x i32> undef,
-                          <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32> - Identity shuffle.
-  <result> = shufflevector <8 x i32> %v1, <8 x i32> undef,
-                          <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32>
-  <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
-                          <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >  ; yields <8 x i32>
-
- -
- -
- - -

- Aggregate Operations -

- -
- -

LLVM supports several instructions for working with - aggregate values.

- - -

- 'extractvalue' Instruction -

- -
- -
Syntax:
-
-  <result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*
-
- -
Overview:
-

The 'extractvalue' instruction extracts the value of a member field - from an aggregate value.

- -
Arguments:
-

The first operand of an 'extractvalue' instruction is a value - of struct or - array type. The operands are constant indices to - specify which value to extract in a similar manner as indices in a - 'getelementptr' instruction.

-

The major differences to getelementptr indexing are:

-
    -
  • Since the value being indexed is not a pointer, the first index is - omitted and assumed to be zero.
  • -
  • At least one index must be specified.
  • -
  • Not only struct indices but also array indices must be in - bounds.
  • -
- -
Semantics:
-

The result is the value at the position in the aggregate specified by the - index operands.

- -
Example:
-
-  <result> = extractvalue {i32, float} %agg, 0    ; yields i32
-
- -
- - -

- 'insertvalue' Instruction -

- -
- -
Syntax:
-
-  <result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}*    ; yields <aggregate type>
-
- -
Overview:
-

The 'insertvalue' instruction inserts a value into a member field - in an aggregate value.

- -
Arguments:
-

The first operand of an 'insertvalue' instruction is a value - of struct or - array type. The second operand is a first-class - value to insert. The following operands are constant indices indicating - the position at which to insert the value in a similar manner as indices in a - 'extractvalue' instruction. The - value to insert must have the same type as the value identified by the - indices.

- -
Semantics:
-

The result is an aggregate of the same type as val. Its value is - that of val except that the value at the position specified by the - indices is that of elt.

- -
Example:
-
-  %agg1 = insertvalue {i32, float} undef, i32 1, 0              ; yields {i32 1, float undef}
-  %agg2 = insertvalue {i32, float} %agg1, float %val, 1         ; yields {i32 1, float %val}
-  %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0    ; yields {i32 1, float %val}
-
- -
- -
- - -

- Memory Access and Addressing Operations -

- -
- -

A key design point of an SSA-based representation is how it represents - memory. In LLVM, no memory locations are in SSA form, which makes things - very simple. This section describes how to read, write, and allocate - memory in LLVM.

- - -

- 'alloca' Instruction -

- -
- -
Syntax:
-
-  <result> = alloca <type>[, <ty> <NumElements>][, align <alignment>]     ; yields {type*}:result
-
- -
Overview:
-

The 'alloca' instruction allocates memory on the stack frame of the - currently executing function, to be automatically released when this function - returns to its caller. The object is always allocated in the generic address - space (address space zero).

- -
Arguments:
-

The 'alloca' instruction - allocates sizeof(<type>)*NumElements bytes of memory on the - runtime stack, returning a pointer of the appropriate type to the program. - If "NumElements" is specified, it is the number of elements allocated, - otherwise "NumElements" is defaulted to be one. If a constant alignment is - specified, the value result of the allocation is guaranteed to be aligned to - at least that boundary. If not specified, or if zero, the target can choose - to align the allocation on any convenient boundary compatible with the - type.

- -

'type' may be any sized type.

- -
Semantics:
-

Memory is allocated; a pointer is returned. The operation is undefined if - there is insufficient stack space for the allocation. 'alloca'd - memory is automatically released when the function returns. The - 'alloca' instruction is commonly used to represent automatic - variables that must have an address available. When the function returns - (either with the ret - or resume instructions), the memory is - reclaimed. Allocating zero bytes is legal, but the result is undefined. - The order in which memory is allocated (ie., which way the stack grows) is - not specified.

- -

- -

Example:
-
-  %ptr = alloca i32                             ; yields {i32*}:ptr
-  %ptr = alloca i32, i32 4                      ; yields {i32*}:ptr
-  %ptr = alloca i32, i32 4, align 1024          ; yields {i32*}:ptr
-  %ptr = alloca i32, align 1024                 ; yields {i32*}:ptr
-
- -
- - -

- 'load' Instruction -

- -
- -
Syntax:
-
-  <result> = load [volatile] <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>]
-  <result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment>
-  !<index> = !{ i32 1 }
-
- -
Overview:
-

The 'load' instruction is used to read from memory.

- -
Arguments:
-

The argument to the 'load' instruction specifies the memory address - from which to load. The pointer must point to - a first class type. If the load is - marked as volatile, then the optimizer is not allowed to modify the - number or order of execution of this load with other volatile operations.

- -

If the load is marked as atomic, it takes an extra - ordering and optional singlethread - argument. The release and acq_rel orderings are - not valid on load instructions. Atomic loads produce defined results when they may see multiple atomic - stores. The type of the pointee must be an integer type whose bit width - is a power of two greater than or equal to eight and less than or equal - to a target-specific size limit. align must be explicitly - specified on atomic loads, and the load has undefined behavior if the - alignment is not set to a value which is at least the size in bytes of - the pointee. !nontemporal does not have any defined semantics - for atomic loads.

- -

The optional constant align argument specifies the alignment of the - operation (that is, the alignment of the memory address). A value of 0 or an - omitted align argument means that the operation has the abi - alignment for the target. It is the responsibility of the code emitter to - ensure that the alignment information is correct. Overestimating the - alignment results in undefined behavior. Underestimating the alignment may - produce less efficient code. An alignment of 1 is always safe.

- -

The optional !nontemporal metadata must reference a single - metatadata name <index> corresponding to a metadata node with - one i32 entry of value 1. The existence of - the !nontemporal metatadata on the instruction tells the optimizer - and code generator that this load is not expected to be reused in the cache. - The code generator may select special instructions to save cache bandwidth, - such as the MOVNT instruction on x86.

- -

The optional !invariant.load metadata must reference a single - metatadata name <index> corresponding to a metadata node with no - entries. The existence of the !invariant.load metatadata on the - instruction tells the optimizer and code generator that this load address - points to memory which does not change value during program execution. - The optimizer may then move this load around, for example, by hoisting it - out of loops using loop invariant code motion.

- -
Semantics:
-

The location of memory pointed to is loaded. If the value being loaded is of - scalar type then the number of bytes read does not exceed the minimum number - of bytes needed to hold all bits of the type. For example, loading an - i24 reads at most three bytes. When loading a value of a type like - i20 with a size that is not an integral number of bytes, the result - is undefined if the value was not originally written using a store of the - same type.

- -
Examples:
-
-  %ptr = alloca i32                               ; yields {i32*}:ptr
-  store i32 3, i32* %ptr                          ; yields {void}
-  %val = load i32* %ptr                           ; yields {i32}:val = i32 3
-
- -
- - -

- 'store' Instruction -

- -
- -
Syntax:
-
-  store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]        ; yields {void}
-  store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment>  ; yields {void}
-
- -
Overview:
-

The 'store' instruction is used to write to memory.

- -
Arguments:
-

There are two arguments to the 'store' instruction: a value to store - and an address at which to store it. The type of the - '<pointer>' operand must be a pointer to - the first class type of the - '<value>' operand. If the store is marked as - volatile, then the optimizer is not allowed to modify the number or - order of execution of this store with other volatile operations.

- -

If the store is marked as atomic, it takes an extra - ordering and optional singlethread - argument. The acquire and acq_rel orderings aren't - valid on store instructions. Atomic loads produce defined results when they may see multiple atomic - stores. The type of the pointee must be an integer type whose bit width - is a power of two greater than or equal to eight and less than or equal - to a target-specific size limit. align must be explicitly - specified on atomic stores, and the store has undefined behavior if the - alignment is not set to a value which is at least the size in bytes of - the pointee. !nontemporal does not have any defined semantics - for atomic stores.

- -

The optional constant "align" argument specifies the alignment of the - operation (that is, the alignment of the memory address). A value of 0 or an - omitted "align" argument means that the operation has the abi - alignment for the target. It is the responsibility of the code emitter to - ensure that the alignment information is correct. Overestimating the - alignment results in an undefined behavior. Underestimating the alignment may - produce less efficient code. An alignment of 1 is always safe.

- -

The optional !nontemporal metadata must reference a single metatadata - name <index> corresponding to a metadata node with one i32 entry of - value 1. The existence of the !nontemporal metatadata on the - instruction tells the optimizer and code generator that this load is - not expected to be reused in the cache. The code generator may - select special instructions to save cache bandwidth, such as the - MOVNT instruction on x86.

- - -
Semantics:
-

The contents of memory are updated to contain '<value>' at the - location specified by the '<pointer>' operand. If - '<value>' is of scalar type then the number of bytes written - does not exceed the minimum number of bytes needed to hold all bits of the - type. For example, storing an i24 writes at most three bytes. When - writing a value of a type like i20 with a size that is not an - integral number of bytes, it is unspecified what happens to the extra bits - that do not belong to the type, but they will typically be overwritten.

- -
Example:
-
-  %ptr = alloca i32                               ; yields {i32*}:ptr
-  store i32 3, i32* %ptr                          ; yields {void}
-  %val = load i32* %ptr                           ; yields {i32}:val = i32 3
-
- -
- - -

-'fence' Instruction -

- -
- -
Syntax:
-
-  fence [singlethread] <ordering>                   ; yields {void}
-
- -
Overview:
-

The 'fence' instruction is used to introduce happens-before edges -between operations.

- -
Arguments:

'fence' instructions take an ordering argument which defines what -synchronizes-with edges they add. They can only be given -acquire, release, acq_rel, and -seq_cst orderings.

- -
Semantics:
-

A fence A which has (at least) release ordering -semantics synchronizes with a fence B with (at least) -acquire ordering semantics if and only if there exist atomic -operations X and Y, both operating on some atomic object -M, such that A is sequenced before X, -X modifies M (either directly or through some side effect -of a sequence headed by X), Y is sequenced before -B, and Y observes M. This provides a -happens-before dependency between A and B. Rather -than an explicit fence, one (but not both) of the atomic operations -X or Y might provide a release or -acquire (resp.) ordering constraint and still -synchronize-with the explicit fence and establish the -happens-before edge.

- -

A fence which has seq_cst ordering, in addition to -having both acquire and release semantics specified -above, participates in the global program order of other seq_cst -operations and/or fences.

- -

The optional "singlethread" argument -specifies that the fence only synchronizes with other fences in the same -thread. (This is useful for interacting with signal handlers.)

- -
Example:
-
-  fence acquire                          ; yields {void}
-  fence singlethread seq_cst             ; yields {void}
-
- -
- - -

-'cmpxchg' Instruction -

- -
- -
Syntax:
-
-  cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>  ; yields {ty}
-
- -
Overview:
-

The 'cmpxchg' instruction is used to atomically modify memory. -It loads a value in memory and compares it to a given value. If they are -equal, it stores a new value into the memory.

- -
Arguments:
-

There are three arguments to the 'cmpxchg' instruction: an -address to operate on, a value to compare to the value currently be at that -address, and a new value to place at that address if the compared values are -equal. The type of '<cmp>' must be an integer type whose -bit width is a power of two greater than or equal to eight and less than -or equal to a target-specific size limit. '<cmp>' and -'<new>' must have the same type, and the type of -'<pointer>' must be a pointer to that type. If the -cmpxchg is marked as volatile, then the -optimizer is not allowed to modify the number or order of execution -of this cmpxchg with other volatile -operations.

- - - -

The ordering argument specifies how this -cmpxchg synchronizes with other atomic operations.

- -

The optional "singlethread" argument declares that the -cmpxchg is only atomic with respect to code (usually signal -handlers) running in the same thread as the cmpxchg. Otherwise the -cmpxchg is atomic with respect to all other code in the system.

- -

The pointer passed into cmpxchg must have alignment greater than or equal to -the size in memory of the operand. - -

Semantics:
-

The contents of memory at the location specified by the -'<pointer>' operand is read and compared to -'<cmp>'; if the read value is the equal, -'<new>' is written. The original value at the location -is returned. - -

A successful cmpxchg is a read-modify-write instruction for the -purpose of identifying release sequences. A -failed cmpxchg is equivalent to an atomic load with an ordering -parameter determined by dropping any release part of the -cmpxchg's ordering.

- - - -
Example:
-
-entry:
-  %orig = atomic load i32* %ptr unordered                   ; yields {i32}
-  br label %loop
-
-loop:
-  %cmp = phi i32 [ %orig, %entry ], [%old, %loop]
-  %squared = mul i32 %cmp, %cmp
-  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          ; yields {i32}
-  %success = icmp eq i32 %cmp, %old
-  br i1 %success, label %done, label %loop
-
-done:
-  ...
-
- -
- - -

-'atomicrmw' Instruction -

- -
- -
Syntax:
-
-  atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [singlethread] <ordering>                   ; yields {ty}
-
- -
Overview:
-

The 'atomicrmw' instruction is used to atomically modify memory.

- -
Arguments:
-

There are three arguments to the 'atomicrmw' instruction: an -operation to apply, an address whose value to modify, an argument to the -operation. The operation must be one of the following keywords:

-
    -
  • xchg
  • -
  • add
  • -
  • sub
  • -
  • and
  • -
  • nand
  • -
  • or
  • -
  • xor
  • -
  • max
  • -
  • min
  • -
  • umax
  • -
  • umin
  • -
- -

The type of '<value>' must be an integer type whose -bit width is a power of two greater than or equal to eight and less than -or equal to a target-specific size limit. The type of the -'<pointer>' operand must be a pointer to that type. -If the atomicrmw is marked as volatile, then the -optimizer is not allowed to modify the number or order of execution of this -atomicrmw with other volatile - operations.

- - - -
Semantics:
-

The contents of memory at the location specified by the -'<pointer>' operand are atomically read, modified, and written -back. The original value at the location is returned. The modification is -specified by the operation argument:

- -
    -
  • xchg: *ptr = val
  • -
  • add: *ptr = *ptr + val
  • -
  • sub: *ptr = *ptr - val
  • -
  • and: *ptr = *ptr & val
  • -
  • nand: *ptr = ~(*ptr & val)
  • -
  • or: *ptr = *ptr | val
  • -
  • xor: *ptr = *ptr ^ val
  • -
  • max: *ptr = *ptr > val ? *ptr : val (using a signed comparison)
  • -
  • min: *ptr = *ptr < val ? *ptr : val (using a signed comparison)
  • -
  • umax: *ptr = *ptr > val ? *ptr : val (using an unsigned comparison)
  • -
  • umin: *ptr = *ptr < val ? *ptr : val (using an unsigned comparison)
  • -
- -
Example:
-
-  %old = atomicrmw add i32* %ptr, i32 1 acquire                        ; yields {i32}
-
- -
- - -

- 'getelementptr' Instruction -

- -
- -
Syntax:
-
-  <result> = getelementptr <pty>* <ptrval>{, <ty> <idx>}*
-  <result> = getelementptr inbounds <pty>* <ptrval>{, <ty> <idx>}*
-  <result> = getelementptr <ptr vector> ptrval, <vector index type> idx 
-
- -
Overview:
-

The 'getelementptr' instruction is used to get the address of a - subelement of an aggregate data structure. - It performs address calculation only and does not access memory.

- -
Arguments:
-

The first argument is always a pointer or a vector of pointers, - and forms the basis of the - calculation. The remaining arguments are indices that indicate which of the - elements of the aggregate object are indexed. The interpretation of each - index is dependent on the type being indexed into. The first index always - indexes the pointer value given as the first argument, the second index - indexes a value of the type pointed to (not necessarily the value directly - pointed to, since the first index can be non-zero), etc. The first type - indexed into must be a pointer value, subsequent types can be arrays, - vectors, and structs. Note that subsequent types being indexed into - can never be pointers, since that would require loading the pointer before - continuing calculation.

- -

The type of each index argument depends on the type it is indexing into. - When indexing into a (optionally packed) structure, only i32 - integer constants are allowed. When indexing into an array, pointer - or vector, integers of any width are allowed, and they are not required to be - constant. These integers are treated as signed values where relevant.

- -

For example, let's consider a C code fragment and how it gets compiled to - LLVM:

- -
-struct RT {
-  char A;
-  int B[10][20];
-  char C;
-};
-struct ST {
-  int X;
-  double Y;
-  struct RT Z;
-};
-
-int *foo(struct ST *s) {
-  return &s[1].Z.B[5][13];
-}
-
- -

The LLVM code generated by Clang is:

- -
-%struct.RT = type { i8, [10 x [20 x i32]], i8 }
-%struct.ST = type { i32, double, %struct.RT }
-
-define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
-entry:
-  %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
-  ret i32* %arrayidx
-}
-
- -
Semantics:
-

In the example above, the first index is indexing into the - '%struct.ST*' type, which is a pointer, yielding a - '%struct.ST' = '{ i32, double, %struct.RT }' type, a - structure. The second index indexes into the third element of the structure, - yielding a '%struct.RT' = '{ i8 , [10 x [20 x i32]], i8 }' - type, another structure. The third index indexes into the second element of - the structure, yielding a '[10 x [20 x i32]]' type, an array. The - two dimensions of the array are subscripted into, yielding an 'i32' - type. The 'getelementptr' instruction returns a pointer to this - element, thus computing a value of 'i32*' type.

- -

Note that it is perfectly legal to index partially through a structure, - returning a pointer to an inner element. Because of this, the LLVM code for - the given testcase is equivalent to:

- -
-define i32* @foo(%struct.ST* %s) {
-  %t1 = getelementptr %struct.ST* %s, i32 1                 ; yields %struct.ST*:%t1
-  %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         ; yields %struct.RT*:%t2
-  %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         ; yields [10 x [20 x i32]]*:%t3
-  %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  ; yields [20 x i32]*:%t4
-  %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        ; yields i32*:%t5
-  ret i32* %t5
-}
-
- -

If the inbounds keyword is present, the result value of the - getelementptr is a poison value if the - base pointer is not an in bounds address of an allocated object, - or if any of the addresses that would be formed by successive addition of - the offsets implied by the indices to the base address with infinitely - precise signed arithmetic are not an in bounds address of that - allocated object. The in bounds addresses for an allocated object - are all the addresses that point into the object, plus the address one - byte past the end. - In cases where the base is a vector of pointers the inbounds keyword - applies to each of the computations element-wise.

- -

If the inbounds keyword is not present, the offsets are added to - the base address with silently-wrapping two's complement arithmetic. If the - offsets have a different width from the pointer, they are sign-extended or - truncated to the width of the pointer. The result value of the - getelementptr may be outside the object pointed to by the base - pointer. The result value may not necessarily be used to access memory - though, even if it happens to point into allocated storage. See the - Pointer Aliasing Rules section for more - information.

- -

The getelementptr instruction is often confusing. For some more insight into - how it works, see the getelementptr FAQ.

- -
Example:
-
-    ; yields [12 x i8]*:aptr
-    %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
-    ; yields i8*:vptr
-    %vptr = getelementptr {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1
-    ; yields i8*:eptr
-    %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
-    ; yields i32*:iptr
-    %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
-
- -

In cases where the pointer argument is a vector of pointers, only a - single index may be used, and the number of vector elements has to be - the same. For example:

-
- %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
-
- -
- -
- - -

- Conversion Operations -

- -
- -

The instructions in this category are the conversion instructions (casting) - which all take a single operand and a type. They perform various bit - conversions on the operand.

- - -

- 'trunc .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = trunc <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'trunc' instruction truncates its operand to the - type ty2.

- -
Arguments:
-

The 'trunc' instruction takes a value to trunc, and a type to trunc it to. - Both types must be of integer types, or vectors - of the same number of integers. - The bit size of the value must be larger than - the bit size of the destination type, ty2. - Equal sized types are not allowed.

- -
Semantics:
-

The 'trunc' instruction truncates the high order bits - in value and converts the remaining bits to ty2. Since the - source size must be larger than the destination size, trunc cannot - be a no-op cast. It will always truncate bits.

- -
Example:
-
-  %X = trunc i32 257 to i8                        ; yields i8:1
-  %Y = trunc i32 123 to i1                        ; yields i1:true
-  %Z = trunc i32 122 to i1                        ; yields i1:false
-  %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
-
- -
- - -

- 'zext .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = zext <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'zext' instruction zero extends its operand to type - ty2.

- - -
Arguments:
-

The 'zext' instruction takes a value to cast, and a type to cast it to. - Both types must be of integer types, or vectors - of the same number of integers. - The bit size of the value must be smaller than - the bit size of the destination type, - ty2.

- -
Semantics:
-

The zext fills the high order bits of the value with zero - bits until it reaches the size of the destination type, ty2.

- -

When zero extending from i1, the result will always be either 0 or 1.

- -
Example:
-
-  %X = zext i32 257 to i64              ; yields i64:257
-  %Y = zext i1 true to i32              ; yields i32:1
-  %Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
-
- -
- - -

- 'sext .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = sext <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'sext' sign extends value to the type ty2.

- -
Arguments:
-

The 'sext' instruction takes a value to cast, and a type to cast it to. - Both types must be of integer types, or vectors - of the same number of integers. - The bit size of the value must be smaller than - the bit size of the destination type, - ty2.

- -
Semantics:
-

The 'sext' instruction performs a sign extension by copying the sign - bit (highest order bit) of the value until it reaches the bit size - of the type ty2.

- -

When sign extending from i1, the extension always results in -1 or 0.

- -
Example:
-
-  %X = sext i8  -1 to i16              ; yields i16   :65535
-  %Y = sext i1 true to i32             ; yields i32:-1
-  %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
-
- -
- - -

- 'fptrunc .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = fptrunc <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'fptrunc' instruction truncates value to type - ty2.

- -
Arguments:
-

The 'fptrunc' instruction takes a floating - point value to cast and a floating point type - to cast it to. The size of value must be larger than the size of - ty2. This implies that fptrunc cannot be used to make a - no-op cast.

- -
Semantics:
-

The 'fptrunc' instruction truncates a value from a larger - floating point type to a smaller - floating point type. If the value cannot fit - within the destination type, ty2, then the results are - undefined.

- -
Example:
-
-  %X = fptrunc double 123.0 to float         ; yields float:123.0
-  %Y = fptrunc double 1.0E+300 to float      ; yields undefined
-
- -
- - -

- 'fpext .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = fpext <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'fpext' extends a floating point value to a larger - floating point value.

- -
Arguments:
-

The 'fpext' instruction takes a - floating point value to cast, and - a floating point type to cast it to. The source - type must be smaller than the destination type.

- -
Semantics:
-

The 'fpext' instruction extends the value from a smaller - floating point type to a larger - floating point type. The fpext cannot be - used to make a no-op cast because it always changes bits. Use - bitcast to make a no-op cast for a floating point cast.

- -
Example:
-
-  %X = fpext float 3.125 to double         ; yields double:3.125000e+00
-  %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
-
- -
- - -

- 'fptoui .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = fptoui <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'fptoui' converts a floating point value to its - unsigned integer equivalent of type ty2.

- -
Arguments:
-

The 'fptoui' instruction takes a value to cast, which must be a - scalar or vector floating point value, and a type - to cast it to ty2, which must be an integer - type. If ty is a vector floating point type, ty2 must be a - vector integer type with the same number of elements as ty

- -
Semantics:
-

The 'fptoui' instruction converts its - floating point operand into the nearest (rounding - towards zero) unsigned integer value. If the value cannot fit - in ty2, the results are undefined.

- -
Example:
-
-  %X = fptoui double 123.0 to i32      ; yields i32:123
-  %Y = fptoui float 1.0E+300 to i1     ; yields undefined:1
-  %Z = fptoui float 1.04E+17 to i8     ; yields undefined:1
-
- -
- - -

- 'fptosi .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = fptosi <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'fptosi' instruction converts - floating point value to - type ty2.

- -
Arguments:
-

The 'fptosi' instruction takes a value to cast, which must be a - scalar or vector floating point value, and a type - to cast it to ty2, which must be an integer - type. If ty is a vector floating point type, ty2 must be a - vector integer type with the same number of elements as ty

- -
Semantics:
-

The 'fptosi' instruction converts its - floating point operand into the nearest (rounding - towards zero) signed integer value. If the value cannot fit in ty2, - the results are undefined.

- -
Example:
-
-  %X = fptosi double -123.0 to i32      ; yields i32:-123
-  %Y = fptosi float 1.0E-247 to i1      ; yields undefined:1
-  %Z = fptosi float 1.04E+17 to i8      ; yields undefined:1
-
- -
- - -

- 'uitofp .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = uitofp <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'uitofp' instruction regards value as an unsigned - integer and converts that value to the ty2 type.

- -
Arguments:
-

The 'uitofp' instruction takes a value to cast, which must be a - scalar or vector integer value, and a type to cast - it to ty2, which must be an floating point - type. If ty is a vector integer type, ty2 must be a vector - floating point type with the same number of elements as ty

- -
Semantics:
-

The 'uitofp' instruction interprets its operand as an unsigned - integer quantity and converts it to the corresponding floating point - value. If the value cannot fit in the floating point value, the results are - undefined.

- -
Example:
-
-  %X = uitofp i32 257 to float         ; yields float:257.0
-  %Y = uitofp i8 -1 to double          ; yields double:255.0
-
- -
- - -

- 'sitofp .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = sitofp <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'sitofp' instruction regards value as a signed integer - and converts that value to the ty2 type.

- -
Arguments:
-

The 'sitofp' instruction takes a value to cast, which must be a - scalar or vector integer value, and a type to cast - it to ty2, which must be an floating point - type. If ty is a vector integer type, ty2 must be a vector - floating point type with the same number of elements as ty

- -
Semantics:
-

The 'sitofp' instruction interprets its operand as a signed integer - quantity and converts it to the corresponding floating point value. If the - value cannot fit in the floating point value, the results are undefined.

- -
Example:
-
-  %X = sitofp i32 257 to float         ; yields float:257.0
-  %Y = sitofp i8 -1 to double          ; yields double:-1.0
-
- -
- - -

- 'ptrtoint .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = ptrtoint <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'ptrtoint' instruction converts the pointer or a vector of - pointers value to - the integer (or vector of integers) type ty2.

- -
Arguments:
-

The 'ptrtoint' instruction takes a value to cast, which - must be a a value of type pointer or a vector of - pointers, and a type to cast it to - ty2, which must be an integer or a vector - of integers type.

- -
Semantics:
-

The 'ptrtoint' instruction converts value to integer type - ty2 by interpreting the pointer value as an integer and either - truncating or zero extending that value to the size of the integer type. If - value is smaller than ty2 then a zero extension is done. If - value is larger than ty2 then a truncation is done. If they - are the same size, then nothing is done (no-op cast) other than a type - change.

- -
Example:
-
-  %X = ptrtoint i32* %P to i8                         ; yields truncation on 32-bit architecture
-  %Y = ptrtoint i32* %P to i64                        ; yields zero extension on 32-bit architecture
-  %Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
-
- -
- - -

- 'inttoptr .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = inttoptr <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'inttoptr' instruction converts an integer value to a - pointer type, ty2.

- -
Arguments:
-

The 'inttoptr' instruction takes an integer - value to cast, and a type to cast it to, which must be a - pointer type.

- -
Semantics:
-

The 'inttoptr' instruction converts value to type - ty2 by applying either a zero extension or a truncation depending on - the size of the integer value. If value is larger than the - size of a pointer then a truncation is done. If value is smaller - than the size of a pointer then a zero extension is done. If they are the - same size, nothing is done (no-op cast).

- -
Example:
-
-  %X = inttoptr i32 255 to i32*          ; yields zero extension on 64-bit architecture
-  %Y = inttoptr i32 255 to i32*          ; yields no-op on 32-bit architecture
-  %Z = inttoptr i64 0 to i32*            ; yields truncation on 32-bit architecture
-  %Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers
-
- -
- - -

- 'bitcast .. to' Instruction -

- -
- -
Syntax:
-
-  <result> = bitcast <ty> <value> to <ty2>             ; yields ty2
-
- -
Overview:
-

The 'bitcast' instruction converts value to type - ty2 without changing any bits.

- -
Arguments:
-

The 'bitcast' instruction takes a value to cast, which must be a - non-aggregate first class value, and a type to cast it to, which must also be - a non-aggregate first class type. The bit sizes - of value and the destination type, ty2, must be - identical. If the source type is a pointer, the destination type must also be - a pointer. This instruction supports bitwise conversion of vectors to - integers and to vectors of other types (as long as they have the same - size).

- -
Semantics:
-

The 'bitcast' instruction converts value to type - ty2. It is always a no-op cast because no bits change with - this conversion. The conversion is done as if the value had been - stored to memory and read back as type ty2. - Pointer (or vector of pointers) types may only be converted to other pointer - (or vector of pointers) types with this instruction. To convert - pointers to other types, use the inttoptr or - ptrtoint instructions first.

- -
Example:
-
-  %X = bitcast i8 255 to i8              ; yields i8 :-1
-  %Y = bitcast i32* %x to sint*          ; yields sint*:%x
-  %Z = bitcast <2 x int> %V to i64;        ; yields i64: %V
-  %Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>
-
- -
- -
- - -

- Other Operations -

- -
- -

The instructions in this category are the "miscellaneous" instructions, which - defy better classification.

- - -

- 'icmp' Instruction -

- -
- -
Syntax:
-
-  <result> = icmp <cond> <ty> <op1>, <op2>   ; yields {i1} or {<N x i1>}:result
-
- -
Overview:
-

The 'icmp' instruction returns a boolean value or a vector of - boolean values based on comparison of its two integer, integer vector, - pointer, or pointer vector operands.

- -
Arguments:
-

The 'icmp' instruction takes three operands. The first operand is - the condition code indicating the kind of comparison to perform. It is not a - value, just a keyword. The possible condition code are:

- -
    -
  1. eq: equal
  2. -
  3. ne: not equal
  4. -
  5. ugt: unsigned greater than
  6. -
  7. uge: unsigned greater or equal
  8. -
  9. ult: unsigned less than
  10. -
  11. ule: unsigned less or equal
  12. -
  13. sgt: signed greater than
  14. -
  15. sge: signed greater or equal
  16. -
  17. slt: signed less than
  18. -
  19. sle: signed less or equal
  20. -
- -

The remaining two arguments must be integer or - pointer or integer vector - typed. They must also be identical types.

- -
Semantics:
-

The 'icmp' compares op1 and op2 according to the - condition code given as cond. The comparison performed always yields - either an i1 or vector of i1 - result, as follows:

- -
    -
  1. eq: yields true if the operands are equal, - false otherwise. No sign interpretation is necessary or - performed.
  2. - -
  3. ne: yields true if the operands are unequal, - false otherwise. No sign interpretation is necessary or - performed.
  4. - -
  5. ugt: interprets the operands as unsigned values and yields - true if op1 is greater than op2.
  6. - -
  7. uge: interprets the operands as unsigned values and yields - true if op1 is greater than or equal - to op2.
  8. - -
  9. ult: interprets the operands as unsigned values and yields - true if op1 is less than op2.
  10. - -
  11. ule: interprets the operands as unsigned values and yields - true if op1 is less than or equal to op2.
  12. - -
  13. sgt: interprets the operands as signed values and yields - true if op1 is greater than op2.
  14. - -
  15. sge: interprets the operands as signed values and yields - true if op1 is greater than or equal - to op2.
  16. - -
  17. slt: interprets the operands as signed values and yields - true if op1 is less than op2.
  18. - -
  19. sle: interprets the operands as signed values and yields - true if op1 is less than or equal to op2.
  20. -
- -

If the operands are pointer typed, the pointer - values are compared as if they were integers.

- -

If the operands are integer vectors, then they are compared element by - element. The result is an i1 vector with the same number of elements - as the values being compared. Otherwise, the result is an i1.

- -
Example:
-
-  <result> = icmp eq i32 4, 5          ; yields: result=false
-  <result> = icmp ne float* %X, %X     ; yields: result=false
-  <result> = icmp ult i16  4, 5        ; yields: result=true
-  <result> = icmp sgt i16  4, 5        ; yields: result=false
-  <result> = icmp ule i16 -4, 5        ; yields: result=false
-  <result> = icmp sge i16  4, 5        ; yields: result=false
-
- -

Note that the code generator does not yet support vector types with - the icmp instruction.

- -
- - -

- 'fcmp' Instruction -

- -
- -
Syntax:
-
-  <result> = fcmp <cond> <ty> <op1>, <op2>     ; yields {i1} or {<N x i1>}:result
-
- -
Overview:
-

The 'fcmp' instruction returns a boolean value or vector of boolean - values based on comparison of its operands.

- -

If the operands are floating point scalars, then the result type is a boolean -(i1).

- -

If the operands are floating point vectors, then the result type is a vector - of boolean with the same number of elements as the operands being - compared.

- -
Arguments:
-

The 'fcmp' instruction takes three operands. The first operand is - the condition code indicating the kind of comparison to perform. It is not a - value, just a keyword. The possible condition code are:

- -
    -
  1. false: no comparison, always returns false
  2. -
  3. oeq: ordered and equal
  4. -
  5. ogt: ordered and greater than
  6. -
  7. oge: ordered and greater than or equal
  8. -
  9. olt: ordered and less than
  10. -
  11. ole: ordered and less than or equal
  12. -
  13. one: ordered and not equal
  14. -
  15. ord: ordered (no nans)
  16. -
  17. ueq: unordered or equal
  18. -
  19. ugt: unordered or greater than
  20. -
  21. uge: unordered or greater than or equal
  22. -
  23. ult: unordered or less than
  24. -
  25. ule: unordered or less than or equal
  26. -
  27. une: unordered or not equal
  28. -
  29. uno: unordered (either nans)
  30. -
  31. true: no comparison, always returns true
  32. -
- -

Ordered means that neither operand is a QNAN while - unordered means that either operand may be a QNAN.

- -

Each of val1 and val2 arguments must be either - a floating point type or - a vector of floating point type. They must have - identical types.

- -
Semantics:
-

The 'fcmp' instruction compares op1 and op2 - according to the condition code given as cond. If the operands are - vectors, then the vectors are compared element by element. Each comparison - performed always yields an i1 result, as - follows:

- -
    -
  1. false: always yields false, regardless of operands.
  2. - -
  3. oeq: yields true if both operands are not a QNAN and - op1 is equal to op2.
  4. - -
  5. ogt: yields true if both operands are not a QNAN and - op1 is greater than op2.
  6. - -
  7. oge: yields true if both operands are not a QNAN and - op1 is greater than or equal to op2.
  8. - -
  9. olt: yields true if both operands are not a QNAN and - op1 is less than op2.
  10. - -
  11. ole: yields true if both operands are not a QNAN and - op1 is less than or equal to op2.
  12. - -
  13. one: yields true if both operands are not a QNAN and - op1 is not equal to op2.
  14. - -
  15. ord: yields true if both operands are not a QNAN.
  16. - -
  17. ueq: yields true if either operand is a QNAN or - op1 is equal to op2.
  18. - -
  19. ugt: yields true if either operand is a QNAN or - op1 is greater than op2.
  20. - -
  21. uge: yields true if either operand is a QNAN or - op1 is greater than or equal to op2.
  22. - -
  23. ult: yields true if either operand is a QNAN or - op1 is less than op2.
  24. - -
  25. ule: yields true if either operand is a QNAN or - op1 is less than or equal to op2.
  26. - -
  27. une: yields true if either operand is a QNAN or - op1 is not equal to op2.
  28. - -
  29. uno: yields true if either operand is a QNAN.
  30. - -
  31. true: always yields true, regardless of operands.
  32. -
- -
Example:
-
-  <result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
-  <result> = fcmp one float 4.0, 5.0    ; yields: result=true
-  <result> = fcmp olt float 4.0, 5.0    ; yields: result=true
-  <result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
-
- -

Note that the code generator does not yet support vector types with - the fcmp instruction.

- -
- - -

- 'phi' Instruction -

- -
- -
Syntax:
-
-  <result> = phi <ty> [ <val0>, <label0>], ...
-
- -
Overview:
-

The 'phi' instruction is used to implement the φ node in the - SSA graph representing the function.

- -
Arguments:
-

The type of the incoming values is specified with the first type field. After - this, the 'phi' instruction takes a list of pairs as arguments, with - one pair for each predecessor basic block of the current block. Only values - of first class type may be used as the value - arguments to the PHI node. Only labels may be used as the label - arguments.

- -

There must be no non-phi instructions between the start of a basic block and - the PHI instructions: i.e. PHI instructions must be first in a basic - block.

- -

For the purposes of the SSA form, the use of each incoming value is deemed to - occur on the edge from the corresponding predecessor block to the current - block (but after any definition of an 'invoke' instruction's return - value on the same edge).

- -
Semantics:
-

At runtime, the 'phi' instruction logically takes on the value - specified by the pair corresponding to the predecessor basic block that - executed just prior to the current block.

- -
Example:
-
-Loop:       ; Infinite loop that counts from 0 on up...
-  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
-  %nextindvar = add i32 %indvar, 1
-  br label %Loop
-
- -
- - -

- 'select' Instruction -

- -
- -
Syntax:
-
-  <result> = select selty <cond>, <ty> <val1>, <ty> <val2>             ; yields ty
-
-  selty is either i1 or {<N x i1>}
-
- -
Overview:
-

The 'select' instruction is used to choose one value based on a - condition, without branching.

- - -
Arguments:
-

The 'select' instruction requires an 'i1' value or a vector of 'i1' - values indicating the condition, and two values of the - same first class type. If the val1/val2 are - vectors and the condition is a scalar, then entire vectors are selected, not - individual elements.

- -
Semantics:
-

If the condition is an i1 and it evaluates to 1, the instruction returns the - first value argument; otherwise, it returns the second value argument.

- -

If the condition is a vector of i1, then the value arguments must be vectors - of the same size, and the selection is done element by element.

- -
Example:
-
-  %X = select i1 true, i8 17, i8 42          ; yields i8:17
-
- -
- - -

- 'call' Instruction -

- -
- -
Syntax:
-
-  <result> = [tail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
-
- -
Overview:
-

The 'call' instruction represents a simple function call.

- -
Arguments:
-

This instruction requires several arguments:

- -
    -
  1. The optional "tail" marker indicates that the callee function does not - access any allocas or varargs in the caller. Note that calls may be - marked "tail" even if they do not occur before - a ret instruction. If the "tail" marker is - present, the function call is eligible for tail call optimization, - but might not in fact be - optimized into a jump. The code generator may optimize calls marked - "tail" with either 1) automatic - sibling call optimization when the caller and callee have - matching signatures, or 2) forced tail call optimization when the - following extra requirements are met: -
      -
    • Caller and callee both have the calling - convention fastcc.
    • -
    • The call is in tail position (ret immediately follows call and ret - uses value of call or is void).
    • -
    • Option -tailcallopt is enabled, - or llvm::GuaranteedTailCallOpt is true.
    • -
    • Platform specific - constraints are met.
    • -
    -
  2. - -
  3. The optional "cconv" marker indicates which calling - convention the call should use. If none is specified, the call - defaults to using C calling conventions. The calling convention of the - call must match the calling convention of the target function, or else the - behavior is undefined.
  4. - -
  5. The optional Parameter Attributes list for - return values. Only 'zeroext', 'signext', and - 'inreg' attributes are valid here.
  6. - -
  7. 'ty': the type of the call instruction itself which is also the - type of the return value. Functions that return no value are marked - void.
  8. - -
  9. 'fnty': shall be the signature of the pointer to function value - being invoked. The argument types must match the types implied by this - signature. This type can be omitted if the function is not varargs and if - the function type does not return a pointer to a function.
  10. - -
  11. 'fnptrval': An LLVM value containing a pointer to a function to - be invoked. In most cases, this is a direct function invocation, but - indirect calls are just as possible, calling an arbitrary pointer - to function value.
  12. - -
  13. 'function args': argument list whose types match the function - signature argument types and parameter attributes. All arguments must be - of first class type. If the function - signature indicates the function accepts a variable number of arguments, - the extra arguments can be specified.
  14. - -
  15. The optional function attributes list. Only - 'noreturn', 'nounwind', 'readonly' and - 'readnone' attributes are valid here.
  16. -
- -
Semantics:
-

The 'call' instruction is used to cause control flow to transfer to - a specified function, with its incoming arguments bound to the specified - values. Upon a 'ret' instruction in the called - function, control flow continues with the instruction after the function - call, and the return value of the function is bound to the result - argument.

- -
Example:
-
-  %retval = call i32 @test(i32 %argc)
-  call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        ; yields i32
-  %X = tail call i32 @foo()                                    ; yields i32
-  %Y = tail call fastcc i32 @foo()  ; yields i32
-  call void %foo(i8 97 signext)
-
-  %struct.A = type { i32, i8 }
-  %r = call %struct.A @foo()                        ; yields { 32, i8 }
-  %gr = extractvalue %struct.A %r, 0                ; yields i32
-  %gr1 = extractvalue %struct.A %r, 1               ; yields i8
-  %Z = call void @foo() noreturn                    ; indicates that %foo never returns normally
-  %ZZ = call zeroext i32 @bar()                     ; Return value is %zero extended
-
- -

llvm treats calls to some functions with names and arguments that match the -standard C99 library as being the C99 library functions, and may perform -optimizations or generate code for them under that assumption. This is -something we'd like to change in the future to provide better support for -freestanding environments and non-C-based languages.

- -
- - -

- 'va_arg' Instruction -

- -
- -
Syntax:
-
-  <resultval> = va_arg <va_list*> <arglist>, <argty>
-
- -
Overview:
-

The 'va_arg' instruction is used to access arguments passed through - the "variable argument" area of a function call. It is used to implement the - va_arg macro in C.

- -
Arguments:
-

This instruction takes a va_list* value and the type of the - argument. It returns a value of the specified argument type and increments - the va_list to point to the next argument. The actual type - of va_list is target specific.

- -
Semantics:
-

The 'va_arg' instruction loads an argument of the specified type - from the specified va_list and causes the va_list to point - to the next argument. For more information, see the variable argument - handling Intrinsic Functions.

- -

It is legal for this instruction to be called in a function which does not - take a variable number of arguments, for example, the vfprintf - function.

- -

va_arg is an LLVM instruction instead of - an intrinsic function because it takes a type as an - argument.

- -
Example:
-

See the variable argument processing section.

- -

Note that the code generator does not yet fully support va_arg on many - targets. Also, it does not currently support va_arg with aggregate types on - any target.

- -
- - -

- 'landingpad' Instruction -

- -
- -
Syntax:
-
-  <resultval> = landingpad <resultty> personality <type> <pers_fn> <clause>+
-  <resultval> = landingpad <resultty> personality <type> <pers_fn> cleanup <clause>*
-
-  <clause> := catch <type> <value>
-  <clause> := filter <array constant type> <array constant>
-
- -
Overview:
-

The 'landingpad' instruction is used by - LLVM's exception handling - system to specify that a basic block is a landing pad — one where - the exception lands, and corresponds to the code found in the - catch portion of a try/catch sequence. It - defines values supplied by the personality function (pers_fn) upon - re-entry to the function. The resultval has the - type resultty.

- -
Arguments:
-

This instruction takes a pers_fn value. This is the personality - function associated with the unwinding mechanism. The optional - cleanup flag indicates that the landing pad block is a cleanup.

- -

A clause begins with the clause type — catch - or filter — and contains the global variable representing the - "type" that may be caught or filtered respectively. Unlike the - catch clause, the filter clause takes an array constant as - its argument. Use "[0 x i8**] undef" for a filter which cannot - throw. The 'landingpad' instruction must contain at least - one clause or the cleanup flag.

- -
Semantics:
-

The 'landingpad' instruction defines the values which are set by the - personality function (pers_fn) upon re-entry to the function, and - therefore the "result type" of the landingpad instruction. As with - calling conventions, how the personality function results are represented in - LLVM IR is target specific.

- -

The clauses are applied in order from top to bottom. If two - landingpad instructions are merged together through inlining, the - clauses from the calling function are appended to the list of clauses. - When the call stack is being unwound due to an exception being thrown, the - exception is compared against each clause in turn. If it doesn't - match any of the clauses, and the cleanup flag is not set, then - unwinding continues further up the call stack.

- -

The landingpad instruction has several restrictions:

- -
    -
  • A landing pad block is a basic block which is the unwind destination of an - 'invoke' instruction.
  • -
  • A landing pad block must have a 'landingpad' instruction as its - first non-PHI instruction.
  • -
  • There can be only one 'landingpad' instruction within the landing - pad block.
  • -
  • A basic block that is not a landing pad block may not include a - 'landingpad' instruction.
  • -
  • All 'landingpad' instructions in a function must have the same - personality function.
  • -
- -
Example:
-
-  ;; A landing pad which can catch an integer.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-  ;; A landing pad that is a cleanup.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           cleanup
-  ;; A landing pad which can catch an integer and can only throw a double.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-           filter [1 x i8**] [@_ZTId]
-
- -
- -
- -
- - -

Intrinsic Functions

- - -
- -

LLVM supports the notion of an "intrinsic function". These functions have - well known names and semantics and are required to follow certain - restrictions. Overall, these intrinsics represent an extension mechanism for - the LLVM language that does not require changing all of the transformations - in LLVM when adding to the language (or the bitcode reader/writer, the - parser, etc...).

- -

Intrinsic function names must all start with an "llvm." prefix. This - prefix is reserved in LLVM for intrinsic names; thus, function names may not - begin with this prefix. Intrinsic functions must always be external - functions: you cannot define the body of intrinsic functions. Intrinsic - functions may only be used in call or invoke instructions: it is illegal to - take the address of an intrinsic function. Additionally, because intrinsic - functions are part of the LLVM language, it is required if any are added that - they be documented here.

- -

Some intrinsic functions can be overloaded, i.e., the intrinsic represents a - family of functions that perform the same operation but on different data - types. Because LLVM can represent over 8 million different integer types, - overloading is used commonly to allow an intrinsic function to operate on any - integer type. One or more of the argument types or the result type can be - overloaded to accept any integer type. Argument types may also be defined as - exactly matching a previous argument's type or the result type. This allows - an intrinsic function which accepts multiple arguments, but needs all of them - to be of the same type, to only be overloaded with respect to a single - argument or the result.

- -

Overloaded intrinsics will have the names of its overloaded argument types - encoded into its function name, each preceded by a period. Only those types - which are overloaded result in a name suffix. Arguments whose type is matched - against another type do not. For example, the llvm.ctpop function - can take an integer of any width and returns an integer of exactly the same - integer width. This leads to a family of functions such as - i8 @llvm.ctpop.i8(i8 %val) and i29 @llvm.ctpop.i29(i29 - %val). Only one type, the return type, is overloaded, and only one type - suffix is required. Because the argument's type is matched against the return - type, it does not require its own name suffix.

- -

To learn how to add an intrinsic function, please see the - Extending LLVM Guide.

- - -

- Variable Argument Handling Intrinsics -

- -
- -

Variable argument support is defined in LLVM with - the va_arg instruction and these three - intrinsic functions. These functions are related to the similarly named - macros defined in the <stdarg.h> header file.

- -

All of these functions operate on arguments that use a target-specific value - type "va_list". The LLVM assembly language reference manual does - not define what this type is, so all transformations should be prepared to - handle these functions regardless of the type used.

- -

This example shows how the va_arg - instruction and the variable argument handling intrinsic functions are - used.

- -
-define i32 @test(i32 %X, ...) {
-  ; Initialize variable argument processing
-  %ap = alloca i8*
-  %ap2 = bitcast i8** %ap to i8*
-  call void @llvm.va_start(i8* %ap2)
-
-  ; Read a single integer argument
-  %tmp = va_arg i8** %ap, i32
-
-  ; Demonstrate usage of llvm.va_copy and llvm.va_end
-  %aq = alloca i8*
-  %aq2 = bitcast i8** %aq to i8*
-  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-  call void @llvm.va_end(i8* %aq2)
-
-  ; Stop processing of arguments.
-  call void @llvm.va_end(i8* %ap2)
-  ret i32 %tmp
-}
-
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare void @llvm.va_end(i8*)
-
- - -

- 'llvm.va_start' Intrinsic -

- - -
- -
Syntax:
-
-  declare void %llvm.va_start(i8* <arglist>)
-
- -
Overview:
-

The 'llvm.va_start' intrinsic initializes *<arglist> - for subsequent use by va_arg.

- -
Arguments:
-

The argument is a pointer to a va_list element to initialize.

- -
Semantics:
-

The 'llvm.va_start' intrinsic works just like the va_start - macro available in C. In a target-dependent way, it initializes - the va_list element to which the argument points, so that the next - call to va_arg will produce the first variable argument passed to - the function. Unlike the C va_start macro, this intrinsic does not - need to know the last argument of the function as the compiler can figure - that out.

- -
- - -

- 'llvm.va_end' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.va_end(i8* <arglist>)
-
- -
Overview:
-

The 'llvm.va_end' intrinsic destroys *<arglist>, - which has been initialized previously - with llvm.va_start - or llvm.va_copy.

- -
Arguments:
-

The argument is a pointer to a va_list to destroy.

- -
Semantics:
-

The 'llvm.va_end' intrinsic works just like the va_end - macro available in C. In a target-dependent way, it destroys - the va_list element to which the argument points. Calls - to llvm.va_start - and llvm.va_copy must be matched exactly - with calls to llvm.va_end.

- -
- - -

- 'llvm.va_copy' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.va_copy(i8* <destarglist>, i8* <srcarglist>)
-
- -
Overview:
-

The 'llvm.va_copy' intrinsic copies the current argument position - from the source argument list to the destination argument list.

- -
Arguments:
-

The first argument is a pointer to a va_list element to initialize. - The second argument is a pointer to a va_list element to copy - from.

- -
Semantics:
-

The 'llvm.va_copy' intrinsic works just like the va_copy - macro available in C. In a target-dependent way, it copies the - source va_list element into the destination va_list - element. This intrinsic is necessary because - the llvm.va_start intrinsic may be - arbitrarily complex and require, for example, memory allocation.

- -
- -
- - -

- Accurate Garbage Collection Intrinsics -

- -
- -

LLVM support for Accurate Garbage -Collection (GC) requires the implementation and generation of these -intrinsics. These intrinsics allow identification of GC -roots on the stack, as well as garbage collector implementations that -require read and write -barriers. Front-ends for type-safe garbage collected languages should generate -these intrinsics to make use of the LLVM garbage collectors. For more details, -see Accurate Garbage Collection with -LLVM.

- -

The garbage collection intrinsics only operate on objects in the generic - address space (address space zero).

- - -

- 'llvm.gcroot' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
-
- -
Overview:
-

The 'llvm.gcroot' intrinsic declares the existence of a GC root to - the code generator, and allows some metadata to be associated with it.

- -
Arguments:
-

The first argument specifies the address of a stack object that contains the - root pointer. The second pointer (which must be either a constant or a - global value address) contains the meta-data to be associated with the - root.

- -
Semantics:
-

At runtime, a call to this intrinsic stores a null pointer into the "ptrloc" - location. At compile-time, the code generator generates information to allow - the runtime to find the pointer at GC safe points. The 'llvm.gcroot' - intrinsic may only be used in a function which specifies a GC - algorithm.

- -
- - -

- 'llvm.gcread' Intrinsic -

- -
- -
Syntax:
-
-  declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
-
- -
Overview:
-

The 'llvm.gcread' intrinsic identifies reads of references from heap - locations, allowing garbage collector implementations that require read - barriers.

- -
Arguments:
-

The second argument is the address to read from, which should be an address - allocated from the garbage collector. The first object is a pointer to the - start of the referenced object, if needed by the language runtime (otherwise - null).

- -
Semantics:
-

The 'llvm.gcread' intrinsic has the same semantics as a load - instruction, but may be replaced with substantially more complex code by the - garbage collector runtime, as needed. The 'llvm.gcread' intrinsic - may only be used in a function which specifies a GC - algorithm.

- -
- - -

- 'llvm.gcwrite' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
-
- -
Overview:
-

The 'llvm.gcwrite' intrinsic identifies writes of references to heap - locations, allowing garbage collector implementations that require write - barriers (such as generational or reference counting collectors).

- -
Arguments:
-

The first argument is the reference to store, the second is the start of the - object to store it to, and the third is the address of the field of Obj to - store to. If the runtime does not require a pointer to the object, Obj may - be null.

- -
Semantics:
-

The 'llvm.gcwrite' intrinsic has the same semantics as a store - instruction, but may be replaced with substantially more complex code by the - garbage collector runtime, as needed. The 'llvm.gcwrite' intrinsic - may only be used in a function which specifies a GC - algorithm.

- -
- -
- - -

- Code Generator Intrinsics -

- -
- -

These intrinsics are provided by LLVM to expose special features that may - only be implemented with code generator support.

- - -

- 'llvm.returnaddress' Intrinsic -

- -
- -
Syntax:
-
-  declare i8  *@llvm.returnaddress(i32 <level>)
-
- -
Overview:
-

The 'llvm.returnaddress' intrinsic attempts to compute a - target-specific value indicating the return address of the current function - or one of its callers.

- -
Arguments:
-

The argument to this intrinsic indicates which function to return the address - for. Zero indicates the calling function, one indicates its caller, etc. - The argument is required to be a constant integer value.

- -
Semantics:
-

The 'llvm.returnaddress' intrinsic either returns a pointer - indicating the return address of the specified call frame, or zero if it - cannot be identified. The value returned by this intrinsic is likely to be - incorrect or 0 for arguments other than zero, so it should only be used for - debugging purposes.

- -

Note that calling this intrinsic does not prevent function inlining or other - aggressive transformations, so the value returned may not be that of the - obvious source-language caller.

- -
- - -

- 'llvm.frameaddress' Intrinsic -

- -
- -
Syntax:
-
-  declare i8* @llvm.frameaddress(i32 <level>)
-
- -
Overview:
-

The 'llvm.frameaddress' intrinsic attempts to return the - target-specific frame pointer value for the specified stack frame.

- -
Arguments:
-

The argument to this intrinsic indicates which function to return the frame - pointer for. Zero indicates the calling function, one indicates its caller, - etc. The argument is required to be a constant integer value.

- -
Semantics:
-

The 'llvm.frameaddress' intrinsic either returns a pointer - indicating the frame address of the specified call frame, or zero if it - cannot be identified. The value returned by this intrinsic is likely to be - incorrect or 0 for arguments other than zero, so it should only be used for - debugging purposes.

- -

Note that calling this intrinsic does not prevent function inlining or other - aggressive transformations, so the value returned may not be that of the - obvious source-language caller.

- -
- - -

- 'llvm.stacksave' Intrinsic -

- -
- -
Syntax:
-
-  declare i8* @llvm.stacksave()
-
- -
Overview:
-

The 'llvm.stacksave' intrinsic is used to remember the current state - of the function stack, for use - with llvm.stackrestore. This is - useful for implementing language features like scoped automatic variable - sized arrays in C99.

- -
Semantics:
-

This intrinsic returns a opaque pointer value that can be passed - to llvm.stackrestore. When - an llvm.stackrestore intrinsic is executed with a value saved - from llvm.stacksave, it effectively restores the state of the stack - to the state it was in when the llvm.stacksave intrinsic executed. - In practice, this pops any alloca blocks from the - stack that were allocated after the llvm.stacksave was executed.

- -
- - -

- 'llvm.stackrestore' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.stackrestore(i8* %ptr)
-
- -
Overview:
-

The 'llvm.stackrestore' intrinsic is used to restore the state of - the function stack to the state it was in when the - corresponding llvm.stacksave intrinsic - executed. This is useful for implementing language features like scoped - automatic variable sized arrays in C99.

- -
Semantics:
-

See the description - for llvm.stacksave.

- -
- - -

- 'llvm.prefetch' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
-
- -
Overview:
-

The 'llvm.prefetch' intrinsic is a hint to the code generator to - insert a prefetch instruction if supported; otherwise, it is a noop. - Prefetches have no effect on the behavior of the program but can change its - performance characteristics.

- -
Arguments:
-

address is the address to be prefetched, rw is the - specifier determining if the fetch should be for a read (0) or write (1), - and locality is a temporal locality specifier ranging from (0) - no - locality, to (3) - extremely local keep in cache. The cache type - specifies whether the prefetch is performed on the data (1) or instruction (0) - cache. The rw, locality and cache type arguments - must be constant integers.

- -
Semantics:
-

This intrinsic does not modify the behavior of the program. In particular, - prefetches cannot trap and do not produce a value. On targets that support - this intrinsic, the prefetch can provide hints to the processor cache for - better performance.

- -
- - -

- 'llvm.pcmarker' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.pcmarker(i32 <id>)
-
- -
Overview:
-

The 'llvm.pcmarker' intrinsic is a method to export a Program - Counter (PC) in a region of code to simulators and other tools. The method - is target specific, but it is expected that the marker will use exported - symbols to transmit the PC of the marker. The marker makes no guarantees - that it will remain with any specific instruction after optimizations. It is - possible that the presence of a marker will inhibit optimizations. The - intended use is to be inserted after optimizations to allow correlations of - simulation runs.

- -
Arguments:
-

id is a numerical id identifying the marker.

- -
Semantics:
-

This intrinsic does not modify the behavior of the program. Backends that do - not support this intrinsic may ignore it.

- -
- - -

- 'llvm.readcyclecounter' Intrinsic -

- -
- -
Syntax:
-
-  declare i64 @llvm.readcyclecounter()
-
- -
Overview:
-

The 'llvm.readcyclecounter' intrinsic provides access to the cycle - counter register (or similar low latency, high accuracy clocks) on those - targets that support it. On X86, it should map to RDTSC. On Alpha, it - should map to RPCC. As the backing counters overflow quickly (on the order - of 9 seconds on alpha), this should only be used for small timings.

- -
Semantics:
-

When directly supported, reading the cycle counter should not modify any - memory. Implementations are allowed to either return a application specific - value or a system wide value. On backends without support, this is lowered - to a constant 0.

- -
- -
- - -

- Standard C Library Intrinsics -

- -
- -

LLVM provides intrinsics for a few important standard C library functions. - These intrinsics allow source-language front-ends to pass information about - the alignment of the pointer arguments to the code generator, providing - opportunity for more efficient code generation.

- - -

- 'llvm.memcpy' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.memcpy on any - integer bit width and for different address spaces. Not all targets support - all bit widths however.

- -
-  declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
-                                          i32 <len>, i32 <align>, i1 <isvolatile>)
-  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
-                                          i64 <len>, i32 <align>, i1 <isvolatile>)
-
- -
Overview:
-

The 'llvm.memcpy.*' intrinsics copy a block of memory from the - source location to the destination location.

- -

Note that, unlike the standard libc function, the llvm.memcpy.* - intrinsics do not return a value, takes extra alignment/isvolatile arguments - and the pointers can be in specified address spaces.

- -
Arguments:
- -

The first argument is a pointer to the destination, the second is a pointer - to the source. The third argument is an integer argument specifying the - number of bytes to copy, the fourth argument is the alignment of the - source and destination locations, and the fifth is a boolean indicating a - volatile access.

- -

If the call to this intrinsic has an alignment value that is not 0 or 1, - then the caller guarantees that both the source and destination pointers are - aligned to that boundary.

- -

If the isvolatile parameter is true, the - llvm.memcpy call is a volatile operation. - The detailed access behavior is not very cleanly specified and it is unwise - to depend on it.

- -
Semantics:
- -

The 'llvm.memcpy.*' intrinsics copy a block of memory from the - source location to the destination location, which are not allowed to - overlap. It copies "len" bytes of memory over. If the argument is known to - be aligned to some boundary, this can be specified as the fourth argument, - otherwise it should be set to 0 or 1.

- -
- - -

- 'llvm.memmove' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.memmove on any integer bit - width and for different address space. Not all targets support all bit - widths however.

- -
-  declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
-                                           i32 <len>, i32 <align>, i1 <isvolatile>)
-  declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
-                                           i64 <len>, i32 <align>, i1 <isvolatile>)
-
- -
Overview:
-

The 'llvm.memmove.*' intrinsics move a block of memory from the - source location to the destination location. It is similar to the - 'llvm.memcpy' intrinsic but allows the two memory locations to - overlap.

- -

Note that, unlike the standard libc function, the llvm.memmove.* - intrinsics do not return a value, takes extra alignment/isvolatile arguments - and the pointers can be in specified address spaces.

- -
Arguments:
- -

The first argument is a pointer to the destination, the second is a pointer - to the source. The third argument is an integer argument specifying the - number of bytes to copy, the fourth argument is the alignment of the - source and destination locations, and the fifth is a boolean indicating a - volatile access.

- -

If the call to this intrinsic has an alignment value that is not 0 or 1, - then the caller guarantees that the source and destination pointers are - aligned to that boundary.

- -

If the isvolatile parameter is true, the - llvm.memmove call is a volatile operation. - The detailed access behavior is not very cleanly specified and it is unwise - to depend on it.

- -
Semantics:
- -

The 'llvm.memmove.*' intrinsics copy a block of memory from the - source location to the destination location, which may overlap. It copies - "len" bytes of memory over. If the argument is known to be aligned to some - boundary, this can be specified as the fourth argument, otherwise it should - be set to 0 or 1.

- -
- - -

- 'llvm.memset.*' Intrinsics -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.memset on any integer bit - width and for different address spaces. However, not all targets support all - bit widths.

- -
-  declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
-                                     i32 <len>, i32 <align>, i1 <isvolatile>)
-  declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
-                                     i64 <len>, i32 <align>, i1 <isvolatile>)
-
- -
Overview:
-

The 'llvm.memset.*' intrinsics fill a block of memory with a - particular byte value.

- -

Note that, unlike the standard libc function, the llvm.memset - intrinsic does not return a value and takes extra alignment/volatile - arguments. Also, the destination can be in an arbitrary address space.

- -
Arguments:
-

The first argument is a pointer to the destination to fill, the second is the - byte value with which to fill it, the third argument is an integer argument - specifying the number of bytes to fill, and the fourth argument is the known - alignment of the destination location.

- -

If the call to this intrinsic has an alignment value that is not 0 or 1, - then the caller guarantees that the destination pointer is aligned to that - boundary.

- -

If the isvolatile parameter is true, the - llvm.memset call is a volatile operation. - The detailed access behavior is not very cleanly specified and it is unwise - to depend on it.

- -
Semantics:
-

The 'llvm.memset.*' intrinsics fill "len" bytes of memory starting - at the destination location. If the argument is known to be aligned to some - boundary, this can be specified as the fourth argument, otherwise it should - be set to 0 or 1.

- -
- - -

- 'llvm.sqrt.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.sqrt on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.sqrt.f32(float %Val)
-  declare double    @llvm.sqrt.f64(double %Val)
-  declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
-  declare fp128     @llvm.sqrt.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
-
- -
Overview:
-

The 'llvm.sqrt' intrinsics return the sqrt of the specified operand, - returning the same value as the libm 'sqrt' functions would. - Unlike sqrt in libm, however, llvm.sqrt has undefined - behavior for negative numbers other than -0.0 (which allows for better - optimization, because there is no need to worry about errno being - set). llvm.sqrt(-0.0) is defined to return -0.0 like IEEE sqrt.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the sqrt of the specified operand if it is a - nonnegative floating point number.

- -
- - -

- 'llvm.powi.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.powi on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.powi.f32(float  %Val, i32 %power)
-  declare double    @llvm.powi.f64(double %Val, i32 %power)
-  declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
-  declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
-  declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
-
- -
Overview:
-

The 'llvm.powi.*' intrinsics return the first operand raised to the - specified (positive or negative) power. The order of evaluation of - multiplications is not defined. When a vector of floating point type is - used, the second argument remains a scalar integer value.

- -
Arguments:
-

The second argument is an integer power, and the first is a value to raise to - that power.

- -
Semantics:
-

This function returns the first value raised to the second power with an - unspecified sequence of rounding operations.

- -
- - -

- 'llvm.sin.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.sin on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.sin.f32(float  %Val)
-  declare double    @llvm.sin.f64(double %Val)
-  declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
-  declare fp128     @llvm.sin.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.sin.*' intrinsics return the sine of the operand.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the sine of the specified operand, returning the same - values as the libm sin functions would, and handles error conditions - in the same way.

- -
- - -

- 'llvm.cos.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.cos on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.cos.f32(float  %Val)
-  declare double    @llvm.cos.f64(double %Val)
-  declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
-  declare fp128     @llvm.cos.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.cos.*' intrinsics return the cosine of the operand.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the cosine of the specified operand, returning the same - values as the libm cos functions would, and handles error conditions - in the same way.

- -
- - -

- 'llvm.pow.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.pow on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.pow.f32(float  %Val, float %Power)
-  declare double    @llvm.pow.f64(double %Val, double %Power)
-  declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
-  declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
-  declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
-
- -
Overview:
-

The 'llvm.pow.*' intrinsics return the first operand raised to the - specified (positive or negative) power.

- -
Arguments:
-

The second argument is a floating point power, and the first is a value to - raise to that power.

- -
Semantics:
-

This function returns the first value raised to the second power, returning - the same values as the libm pow functions would, and handles error - conditions in the same way.

- -
- - -

- 'llvm.exp.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.exp on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.exp.f32(float  %Val)
-  declare double    @llvm.exp.f64(double %Val)
-  declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
-  declare fp128     @llvm.exp.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.exp.*' intrinsics perform the exp function.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the same values as the libm exp functions - would, and handles error conditions in the same way.

- -
- - -

- 'llvm.log.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.log on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.log.f32(float  %Val)
-  declare double    @llvm.log.f64(double %Val)
-  declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
-  declare fp128     @llvm.log.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.log.*' intrinsics perform the log function.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the same values as the libm log functions - would, and handles error conditions in the same way.

- -
- - -

- 'llvm.fma.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.fma on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
-  declare double    @llvm.fma.f64(double %a, double %b, double %c)
-  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
-  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
-  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
-
- -
Overview:
-

The 'llvm.fma.*' intrinsics perform the fused multiply-add - operation.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the same values as the libm fma functions - would.

- -
- - -

- 'llvm.fabs.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.fabs on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.fabs.f32(float  %Val)
-  declare double    @llvm.fabs.f64(double %Val)
-  declare x86_fp80  @llvm.fabs.f80(x86_fp80  %Val)
-  declare fp128     @llvm.fabs.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.fabs.*' intrinsics return the absolute value of - the operand.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the same values as the libm fabs functions - would, and handles error conditions in the same way.

- -
- - -

- 'llvm.floor.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.floor on any - floating point or vector of floating point type. Not all targets support all - types however.

- -
-  declare float     @llvm.floor.f32(float  %Val)
-  declare double    @llvm.floor.f64(double %Val)
-  declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
-  declare fp128     @llvm.floor.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
-
- -
Overview:
-

The 'llvm.floor.*' intrinsics return the floor of - the operand.

- -
Arguments:
-

The argument and return value are floating point numbers of the same - type.

- -
Semantics:
-

This function returns the same values as the libm floor functions - would, and handles error conditions in the same way.

- -
- -
- - -

- Bit Manipulation Intrinsics -

- -
- -

LLVM provides intrinsics for a few important bit manipulation operations. - These allow efficient code generation for some algorithms.

- - -

- 'llvm.bswap.*' Intrinsics -

- -
- -
Syntax:
-

This is an overloaded intrinsic function. You can use bswap on any integer - type that is an even number of bytes (i.e. BitWidth % 16 == 0).

- -
-  declare i16 @llvm.bswap.i16(i16 <id>)
-  declare i32 @llvm.bswap.i32(i32 <id>)
-  declare i64 @llvm.bswap.i64(i64 <id>)
-
- -
Overview:
-

The 'llvm.bswap' family of intrinsics is used to byte swap integer - values with an even number of bytes (positive multiple of 16 bits). These - are useful for performing operations on data that is not in the target's - native byte order.

- -
Semantics:
-

The llvm.bswap.i16 intrinsic returns an i16 value that has the high - and low byte of the input i16 swapped. Similarly, - the llvm.bswap.i32 intrinsic returns an i32 value that has the four - bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1, - 2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order. - The llvm.bswap.i48, llvm.bswap.i64 and other intrinsics - extend this concept to additional even-byte lengths (6 bytes, 8 bytes and - more, respectively).

- -
- - -

- 'llvm.ctpop.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit - width, or on any vector with integer elements. Not all targets support all - bit widths or vector types, however.

- -
-  declare i8 @llvm.ctpop.i8(i8  <src>)
-  declare i16 @llvm.ctpop.i16(i16 <src>)
-  declare i32 @llvm.ctpop.i32(i32 <src>)
-  declare i64 @llvm.ctpop.i64(i64 <src>)
-  declare i256 @llvm.ctpop.i256(i256 <src>)
-  declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> <src>)
-
- -
Overview:
-

The 'llvm.ctpop' family of intrinsics counts the number of bits set - in a value.

- -
Arguments:
-

The only argument is the value to be counted. The argument may be of any - integer type, or a vector with integer elements. - The return type must match the argument type.

- -
Semantics:
-

The 'llvm.ctpop' intrinsic counts the 1's in a variable, or within each - element of a vector.

- -
- - -

- 'llvm.ctlz.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.ctlz on any - integer bit width, or any vector whose elements are integers. Not all - targets support all bit widths or vector types, however.

- -
-  declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_undef>)
-  declare i16  @llvm.ctlz.i16 (i16  <src>, i1 <is_zero_undef>)
-  declare i32  @llvm.ctlz.i32 (i32  <src>, i1 <is_zero_undef>)
-  declare i64  @llvm.ctlz.i64 (i64  <src>, i1 <is_zero_undef>)
-  declare i256 @llvm.ctlz.i256(i256 <src>, i1 <is_zero_undef>)
-  declase <2 x i32> @llvm.ctlz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
-
- -
Overview:
-

The 'llvm.ctlz' family of intrinsic functions counts the number of - leading zeros in a variable.

- -
Arguments:
-

The first argument is the value to be counted. This argument may be of any - integer type, or a vectory with integer element type. The return type - must match the first argument type.

- -

The second argument must be a constant and is a flag to indicate whether the - intrinsic should ensure that a zero as the first argument produces a defined - result. Historically some architectures did not provide a defined result for - zero values as efficiently, and many algorithms are now predicated on - avoiding zero-value inputs.

- -
Semantics:
-

The 'llvm.ctlz' intrinsic counts the leading (most significant) - zeros in a variable, or within each element of the vector. - If src == 0 then the result is the size in bits of the type of - src if is_zero_undef == 0 and undef otherwise. - For example, llvm.ctlz(i32 2) = 30.

- -
- - -

- 'llvm.cttz.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.cttz on any - integer bit width, or any vector of integer elements. Not all targets - support all bit widths or vector types, however.

- -
-  declare i8   @llvm.cttz.i8  (i8   <src>, i1 <is_zero_undef>)
-  declare i16  @llvm.cttz.i16 (i16  <src>, i1 <is_zero_undef>)
-  declare i32  @llvm.cttz.i32 (i32  <src>, i1 <is_zero_undef>)
-  declare i64  @llvm.cttz.i64 (i64  <src>, i1 <is_zero_undef>)
-  declare i256 @llvm.cttz.i256(i256 <src>, i1 <is_zero_undef>)
-  declase <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
-
- -
Overview:
-

The 'llvm.cttz' family of intrinsic functions counts the number of - trailing zeros.

- -
Arguments:
-

The first argument is the value to be counted. This argument may be of any - integer type, or a vectory with integer element type. The return type - must match the first argument type.

- -

The second argument must be a constant and is a flag to indicate whether the - intrinsic should ensure that a zero as the first argument produces a defined - result. Historically some architectures did not provide a defined result for - zero values as efficiently, and many algorithms are now predicated on - avoiding zero-value inputs.

- -
Semantics:
-

The 'llvm.cttz' intrinsic counts the trailing (least significant) - zeros in a variable, or within each element of a vector. - If src == 0 then the result is the size in bits of the type of - src if is_zero_undef == 0 and undef otherwise. - For example, llvm.cttz(2) = 1.

- -
- -
- - -

- Arithmetic with Overflow Intrinsics -

- -
- -

LLVM provides intrinsics for some arithmetic with overflow operations.

- - -

- - 'llvm.sadd.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.sadd.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
-

The 'llvm.sadd.with.overflow' family of intrinsic functions perform - a signed addition of the two arguments, and indicate whether an overflow - occurred during the signed summation.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo signed addition.

- -
Semantics:
-

The 'llvm.sadd.with.overflow' family of intrinsic functions perform - a signed addition of the two variables. They return a structure — the - first element of which is the signed summation, and the second element of - which is a bit specifying if the signed summation resulted in an - overflow.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-
- -
- - -

- - 'llvm.uadd.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.uadd.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
-

The 'llvm.uadd.with.overflow' family of intrinsic functions perform - an unsigned addition of the two arguments, and indicate whether a carry - occurred during the unsigned summation.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo unsigned addition.

- -
Semantics:
-

The 'llvm.uadd.with.overflow' family of intrinsic functions perform - an unsigned addition of the two arguments. They return a structure — - the first element of which is the sum, and the second element of which is a - bit specifying if the unsigned summation resulted in a carry.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %carry, label %normal
-
- -
- - -

- - 'llvm.ssub.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.ssub.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
-

The 'llvm.ssub.with.overflow' family of intrinsic functions perform - a signed subtraction of the two arguments, and indicate whether an overflow - occurred during the signed subtraction.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo signed subtraction.

- -
Semantics:
-

The 'llvm.ssub.with.overflow' family of intrinsic functions perform - a signed subtraction of the two arguments. They return a structure — - the first element of which is the subtraction, and the second element of - which is a bit specifying if the signed subtraction resulted in an - overflow.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-
- -
- - -

- - 'llvm.usub.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.usub.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
-

The 'llvm.usub.with.overflow' family of intrinsic functions perform - an unsigned subtraction of the two arguments, and indicate whether an - overflow occurred during the unsigned subtraction.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo unsigned subtraction.

- -
Semantics:
-

The 'llvm.usub.with.overflow' family of intrinsic functions perform - an unsigned subtraction of the two arguments. They return a structure — - the first element of which is the subtraction, and the second element of - which is a bit specifying if the unsigned subtraction resulted in an - overflow.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-
- -
- - -

- - 'llvm.smul.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.smul.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
- -

The 'llvm.smul.with.overflow' family of intrinsic functions perform - a signed multiplication of the two arguments, and indicate whether an - overflow occurred during the signed multiplication.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo signed multiplication.

- -
Semantics:
-

The 'llvm.smul.with.overflow' family of intrinsic functions perform - a signed multiplication of the two arguments. They return a structure — - the first element of which is the multiplication, and the second element of - which is a bit specifying if the signed multiplication resulted in an - overflow.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-
- -
- - -

- - 'llvm.umul.with.overflow.*' Intrinsics - -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use llvm.umul.with.overflow - on any integer bit width.

- -
-  declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
-
- -
Overview:
-

The 'llvm.umul.with.overflow' family of intrinsic functions perform - a unsigned multiplication of the two arguments, and indicate whether an - overflow occurred during the unsigned multiplication.

- -
Arguments:
-

The arguments (%a and %b) and the first element of the result structure may - be of integer types of any bit width, but they must have the same bit - width. The second element of the result structure must be of - type i1. %a and %b are the two values that will - undergo unsigned multiplication.

- -
Semantics:
-

The 'llvm.umul.with.overflow' family of intrinsic functions perform - an unsigned multiplication of the two arguments. They return a structure - — the first element of which is the multiplication, and the second - element of which is a bit specifying if the unsigned multiplication resulted - in an overflow.

- -
Examples:
-
-  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-
- -
- -
- - -

- Specialised Arithmetic Intrinsics -

- - - -

- 'llvm.fmuladd.*' Intrinsic -

- -
- -
Syntax:
-
-  declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
-
- -
Overview:
-

The 'llvm.fmuladd.*' intrinsic functions represent multiply-add -expressions that can be fused if the code generator determines that the fused -expression would be legal and efficient.

- -
Arguments:
-

The 'llvm.fmuladd.*' intrinsics each take three arguments: two -multiplicands, a and b, and an addend c.

- -
Semantics:
-

The expression:

-
-  %0 = call float @llvm.fmuladd.f32(%a, %b, %c)
-
-

is equivalent to the expression a * b + c, except that rounding will not be -performed between the multiplication and addition steps if the code generator -fuses the operations. Fusion is not guaranteed, even if the target platform -supports it. If a fused multiply-add is required the corresponding llvm.fma.* -intrinsic function should be used instead.

- -
Examples:
-
-  %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
-
- -
- - -

- Half Precision Floating Point Intrinsics -

- -
- -

For most target platforms, half precision floating point is a storage-only - format. This means that it is - a dense encoding (in memory) but does not support computation in the - format.

- -

This means that code must first load the half-precision floating point - value as an i16, then convert it to float with llvm.convert.from.fp16. - Computation can then be performed on the float value (including extending to - double etc). To store the value back to memory, it is first converted to - float if needed, then converted to i16 with - llvm.convert.to.fp16, then - storing as an i16 value.

- - -

- - 'llvm.convert.to.fp16' Intrinsic - -

- -
- -
Syntax:
-
-  declare i16 @llvm.convert.to.fp16(f32 %a)
-
- -
Overview:
-

The 'llvm.convert.to.fp16' intrinsic function performs - a conversion from single precision floating point format to half precision - floating point format.

- -
Arguments:
-

The intrinsic function contains single argument - the value to be - converted.

- -
Semantics:
-

The 'llvm.convert.to.fp16' intrinsic function performs - a conversion from single precision floating point format to half precision - floating point format. The return value is an i16 which - contains the converted number.

- -
Examples:
-
-  %res = call i16 @llvm.convert.to.fp16(f32 %a)
-  store i16 %res, i16* @x, align 2
-
- -
- - -

- - 'llvm.convert.from.fp16' Intrinsic - -

- -
- -
Syntax:
-
-  declare f32 @llvm.convert.from.fp16(i16 %a)
-
- -
Overview:
-

The 'llvm.convert.from.fp16' intrinsic function performs - a conversion from half precision floating point format to single precision - floating point format.

- -
Arguments:
-

The intrinsic function contains single argument - the value to be - converted.

- -
Semantics:
-

The 'llvm.convert.from.fp16' intrinsic function performs a - conversion from half single precision floating point format to single - precision floating point format. The input half-float value is represented by - an i16 value.

- -
Examples:
-
-  %a = load i16* @x, align 2
-  %res = call f32 @llvm.convert.from.fp16(i16 %a)
-
- -
- -
- - -

- Debugger Intrinsics -

- -
- -

The LLVM debugger intrinsics (which all start with llvm.dbg. - prefix), are described in - the LLVM Source - Level Debugging document.

- -
- - -

- Exception Handling Intrinsics -

- -
- -

The LLVM exception handling intrinsics (which all start with - llvm.eh. prefix), are described in - the LLVM Exception - Handling document.

- -
- - -

- Trampoline Intrinsics -

- -
- -

These intrinsics make it possible to excise one parameter, marked with - the nest attribute, from a function. - The result is a callable - function pointer lacking the nest parameter - the caller does not need to - provide a value for it. Instead, the value to use is stored in advance in a - "trampoline", a block of memory usually allocated on the stack, which also - contains code to splice the nest value into the argument list. This is used - to implement the GCC nested function address extension.

- -

For example, if the function is - i32 f(i8* nest %c, i32 %x, i32 %y) then the resulting function - pointer has signature i32 (i32, i32)*. It can be created as - follows:

- -
-  %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
-  %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
-  call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
-  %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
-  %fp = bitcast i8* %p to i32 (i32, i32)*
-
- -

The call %val = call i32 %fp(i32 %x, i32 %y) is then equivalent - to %val = call i32 %f(i8* %nval, i32 %x, i32 %y).

- - -

- - 'llvm.init.trampoline' Intrinsic - -

- -
- -
Syntax:
-
-  declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
-
- -
Overview:
-

This fills the memory pointed to by tramp with executable code, - turning it into a trampoline.

- -
Arguments:
-

The llvm.init.trampoline intrinsic takes three arguments, all - pointers. The tramp argument must point to a sufficiently large and - sufficiently aligned block of memory; this memory is written to by the - intrinsic. Note that the size and the alignment are target-specific - LLVM - currently provides no portable way of determining them, so a front-end that - generates this intrinsic needs to have some target-specific knowledge. - The func argument must hold a function bitcast to - an i8*.

- -
Semantics:
-

The block of memory pointed to by tramp is filled with target - dependent code, turning it into a function. Then tramp needs to be - passed to llvm.adjust.trampoline to get a pointer - which can be bitcast (to a new function) and - called. The new function's signature is the same as that of - func with any arguments marked with the nest attribute - removed. At most one such nest argument is allowed, and it must be of - pointer type. Calling the new function is equivalent to calling func - with the same argument list, but with nval used for the missing - nest argument. If, after calling llvm.init.trampoline, the - memory pointed to by tramp is modified, then the effect of any later call - to the returned function pointer is undefined.

-
- - -

- - 'llvm.adjust.trampoline' Intrinsic - -

- -
- -
Syntax:
-
-  declare i8* @llvm.adjust.trampoline(i8* <tramp>)
-
- -
Overview:
-

This performs any required machine-specific adjustment to the address of a - trampoline (passed as tramp).

- -
Arguments:
-

tramp must point to a block of memory which already has trampoline code - filled in by a previous call to llvm.init.trampoline - .

- -
Semantics:
-

On some architectures the address of the code to be executed needs to be - different to the address where the trampoline is actually stored. This - intrinsic returns the executable address corresponding to tramp - after performing the required machine specific adjustments. - The pointer returned can then be bitcast and - executed. -

- -
- -
- - -

- Memory Use Markers -

- -
- -

This class of intrinsics exists to information about the lifetime of memory - objects and ranges where variables are immutable.

- - -

- 'llvm.lifetime.start' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.lifetime.start(i64 <size>, i8* nocapture <ptr>)
-
- -
Overview:
-

The 'llvm.lifetime.start' intrinsic specifies the start of a memory - object's lifetime.

- -
Arguments:
-

The first argument is a constant integer representing the size of the - object, or -1 if it is variable sized. The second argument is a pointer to - the object.

- -
Semantics:
-

This intrinsic indicates that before this point in the code, the value of the - memory pointed to by ptr is dead. This means that it is known to - never be used and has an undefined value. A load from the pointer that - precedes this intrinsic can be replaced with - 'undef'.

- -
- - -

- 'llvm.lifetime.end' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.lifetime.end(i64 <size>, i8* nocapture <ptr>)
-
- -
Overview:
-

The 'llvm.lifetime.end' intrinsic specifies the end of a memory - object's lifetime.

- -
Arguments:
-

The first argument is a constant integer representing the size of the - object, or -1 if it is variable sized. The second argument is a pointer to - the object.

- -
Semantics:
-

This intrinsic indicates that after this point in the code, the value of the - memory pointed to by ptr is dead. This means that it is known to - never be used and has an undefined value. Any stores into the memory object - following this intrinsic may be removed as dead. - -

- - -

- 'llvm.invariant.start' Intrinsic -

- -
- -
Syntax:
-
-  declare {}* @llvm.invariant.start(i64 <size>, i8* nocapture <ptr>)
-
- -
Overview:
-

The 'llvm.invariant.start' intrinsic specifies that the contents of - a memory object will not change.

- -
Arguments:
-

The first argument is a constant integer representing the size of the - object, or -1 if it is variable sized. The second argument is a pointer to - the object.

- -
Semantics:
-

This intrinsic indicates that until an llvm.invariant.end that uses - the return value, the referenced memory location is constant and - unchanging.

- -
- - -

- 'llvm.invariant.end' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.invariant.end({}* <start>, i64 <size>, i8* nocapture <ptr>)
-
- -
Overview:
-

The 'llvm.invariant.end' intrinsic specifies that the contents of - a memory object are mutable.

- -
Arguments:
-

The first argument is the matching llvm.invariant.start intrinsic. - The second argument is a constant integer representing the size of the - object, or -1 if it is variable sized and the third argument is a pointer - to the object.

- -
Semantics:
-

This intrinsic indicates that the memory is mutable again.

- -
- -
- - -

- General Intrinsics -

- -
- -

This class of intrinsics is designed to be generic and has no specific - purpose.

- - -

- 'llvm.var.annotation' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
-
- -
Overview:
-

The 'llvm.var.annotation' intrinsic.

- -
Arguments:
-

The first argument is a pointer to a value, the second is a pointer to a - global string, the third is a pointer to a global string which is the source - file name, and the last argument is the line number.

- -
Semantics:
-

This intrinsic allows annotation of local variables with arbitrary strings. - This can be useful for special purpose optimizations that want to look for - these annotations. These have no other defined use; they are ignored by code - generation and optimization.

- -
- - -

- 'llvm.annotation.*' Intrinsic -

- -
- -
Syntax:
-

This is an overloaded intrinsic. You can use 'llvm.annotation' on - any integer bit width.

- -
-  declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int>)
-  declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int>)
-  declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int>)
-  declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int>)
-  declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int>)
-
- -
Overview:
-

The 'llvm.annotation' intrinsic.

- -
Arguments:
-

The first argument is an integer value (result of some expression), the - second is a pointer to a global string, the third is a pointer to a global - string which is the source file name, and the last argument is the line - number. It returns the value of the first argument.

- -
Semantics:
-

This intrinsic allows annotations to be put on arbitrary expressions with - arbitrary strings. This can be useful for special purpose optimizations that - want to look for these annotations. These have no other defined use; they - are ignored by code generation and optimization.

- -
- - -

- 'llvm.trap' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.trap() noreturn nounwind
-
- -
Overview:
-

The 'llvm.trap' intrinsic.

- -
Arguments:
-

None.

- -
Semantics:
-

This intrinsic is lowered to the target dependent trap instruction. If the - target does not have a trap instruction, this intrinsic will be lowered to - a call of the abort() function.

- -
- - -

- 'llvm.debugtrap' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.debugtrap() nounwind
-
- -
Overview:
-

The 'llvm.debugtrap' intrinsic.

- -
Arguments:
-

None.

- -
Semantics:
-

This intrinsic is lowered to code which is intended to cause an execution - trap with the intention of requesting the attention of a debugger.

- -
- - -

- 'llvm.stackprotector' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.stackprotector(i8* <guard>, i8** <slot>)
-
- -
Overview:
-

The llvm.stackprotector intrinsic takes the guard and - stores it onto the stack at slot. The stack slot is adjusted to - ensure that it is placed on the stack before local variables.

- -
Arguments:
-

The llvm.stackprotector intrinsic requires two pointer - arguments. The first argument is the value loaded from the stack - guard @__stack_chk_guard. The second variable is an alloca - that has enough space to hold the value of the guard.

- -
Semantics:
-

This intrinsic causes the prologue/epilogue inserter to force the position of - the AllocaInst stack slot to be before local variables on the - stack. This is to ensure that if a local variable on the stack is - overwritten, it will destroy the value of the guard. When the function exits, - the guard on the stack is checked against the original guard. If they are - different, then the program aborts by calling the __stack_chk_fail() - function.

- -
- - -

- 'llvm.objectsize' Intrinsic -

- -
- -
Syntax:
-
-  declare i32 @llvm.objectsize.i32(i8* <object>, i1 <min>)
-  declare i64 @llvm.objectsize.i64(i8* <object>, i1 <min>)
-
- -
Overview:
-

The llvm.objectsize intrinsic is designed to provide information to - the optimizers to determine at compile time whether a) an operation (like - memcpy) will overflow a buffer that corresponds to an object, or b) that a - runtime check for overflow isn't necessary. An object in this context means - an allocation of a specific class, structure, array, or other object.

- -
Arguments:
-

The llvm.objectsize intrinsic takes two arguments. The first - argument is a pointer to or into the object. The second argument - is a boolean and determines whether llvm.objectsize returns 0 (if - true) or -1 (if false) when the object size is unknown. - The second argument only accepts constants.

- -
Semantics:
-

The llvm.objectsize intrinsic is lowered to a constant representing - the size of the object concerned. If the size cannot be determined at compile - time, llvm.objectsize returns i32/i64 -1 or 0 - (depending on the min argument).

- -
- -

- 'llvm.expect' Intrinsic -

- -
- -
Syntax:
-
-  declare i32 @llvm.expect.i32(i32 <val>, i32 <expected_val>)
-  declare i64 @llvm.expect.i64(i64 <val>, i64 <expected_val>)
-
- -
Overview:
-

The llvm.expect intrinsic provides information about expected (the - most probable) value of val, which can be used by optimizers.

- -
Arguments:
-

The llvm.expect intrinsic takes two arguments. The first - argument is a value. The second argument is an expected value, this needs to - be a constant value, variables are not allowed.

- -
Semantics:
-

This intrinsic is lowered to the val.

-
- - -

- 'llvm.donothing' Intrinsic -

- -
- -
Syntax:
-
-  declare void @llvm.donothing() nounwind readnone
-
- -
Overview:
-

The llvm.donothing intrinsic doesn't perform any operation. It's the -only intrinsic that can be called with an invoke instruction.

- -
Arguments:
-

None.

- -
Semantics:
-

This intrinsic does nothing, and it's removed by optimizers and ignored by -codegen.

-
- -
- -
- -
-
- Valid CSS - Valid HTML 4.01 - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-29 15:12:44 +0100 (Mon, 29 Oct 2012) $ -
- - - diff --git a/docs/LangRef.rst b/docs/LangRef.rst new file mode 100644 index 000000000000..659f02afb961 --- /dev/null +++ b/docs/LangRef.rst @@ -0,0 +1,8605 @@ +============================== +LLVM Language Reference Manual +============================== + +.. contents:: + :local: + :depth: 3 + +Abstract +======== + +This document is a reference manual for the LLVM assembly language. LLVM +is a Static Single Assignment (SSA) based representation that provides +type safety, low-level operations, flexibility, and the capability of +representing 'all' high-level languages cleanly. It is the common code +representation used throughout all phases of the LLVM compilation +strategy. + +Introduction +============ + +The LLVM code representation is designed to be used in three different +forms: as an in-memory compiler IR, as an on-disk bitcode representation +(suitable for fast loading by a Just-In-Time compiler), and as a human +readable assembly language representation. This allows LLVM to provide a +powerful intermediate representation for efficient compiler +transformations and analysis, while providing a natural means to debug +and visualize the transformations. The three different forms of LLVM are +all equivalent. This document describes the human readable +representation and notation. + +The LLVM representation aims to be light-weight and low-level while +being expressive, typed, and extensible at the same time. It aims to be +a "universal IR" of sorts, by being at a low enough level that +high-level ideas may be cleanly mapped to it (similar to how +microprocessors are "universal IR's", allowing many source languages to +be mapped to them). By providing type information, LLVM can be used as +the target of optimizations: for example, through pointer analysis, it +can be proven that a C automatic variable is never accessed outside of +the current function, allowing it to be promoted to a simple SSA value +instead of a memory location. + +.. _wellformed: + +Well-Formedness +--------------- + +It is important to note that this document describes 'well formed' LLVM +assembly language. There is a difference between what the parser accepts +and what is considered 'well formed'. For example, the following +instruction is syntactically okay, but not well formed: + +.. code-block:: llvm + + %x = add i32 1, %x + +because the definition of ``%x`` does not dominate all of its uses. The +LLVM infrastructure provides a verification pass that may be used to +verify that an LLVM module is well formed. This pass is automatically +run by the parser after parsing input assembly and by the optimizer +before it outputs bitcode. The violations pointed out by the verifier +pass indicate bugs in transformation passes or input to the parser. + +.. _identifiers: + +Identifiers +=========== + +LLVM identifiers come in two basic types: global and local. Global +identifiers (functions, global variables) begin with the ``'@'`` +character. Local identifiers (register names, types) begin with the +``'%'`` character. Additionally, there are three different formats for +identifiers, for different purposes: + +#. Named values are represented as a string of characters with their + prefix. For example, ``%foo``, ``@DivisionByZero``, + ``%a.really.long.identifier``. The actual regular expression used is + '``[%@][a-zA-Z$._][a-zA-Z$._0-9]*``'. Identifiers which require other + characters in their names can be surrounded with quotes. Special + characters may be escaped using ``"\xx"`` where ``xx`` is the ASCII + code for the character in hexadecimal. In this way, any character can + be used in a name value, even quotes themselves. +#. Unnamed values are represented as an unsigned numeric value with + their prefix. For example, ``%12``, ``@2``, ``%44``. +#. Constants, which are described in the section Constants_ below. + +LLVM requires that values start with a prefix for two reasons: Compilers +don't need to worry about name clashes with reserved words, and the set +of reserved words may be expanded in the future without penalty. +Additionally, unnamed identifiers allow a compiler to quickly come up +with a temporary variable without having to avoid symbol table +conflicts. + +Reserved words in LLVM are very similar to reserved words in other +languages. There are keywords for different opcodes ('``add``', +'``bitcast``', '``ret``', etc...), for primitive type names ('``void``', +'``i32``', etc...), and others. These reserved words cannot conflict +with variable names, because none of them start with a prefix character +(``'%'`` or ``'@'``). + +Here is an example of LLVM code to multiply the integer variable +'``%X``' by 8: + +The easy way: + +.. code-block:: llvm + + %result = mul i32 %X, 8 + +After strength reduction: + +.. code-block:: llvm + + %result = shl i32 %X, 3 + +And the hard way: + +.. code-block:: llvm + + %0 = add i32 %X, %X ; yields {i32}:%0 + %1 = add i32 %0, %0 ; yields {i32}:%1 + %result = add i32 %1, %1 + +This last way of multiplying ``%X`` by 8 illustrates several important +lexical features of LLVM: + +#. Comments are delimited with a '``;``' and go until the end of line. +#. Unnamed temporaries are created when the result of a computation is + not assigned to a named value. +#. Unnamed temporaries are numbered sequentially + +It also shows a convention that we follow in this document. When +demonstrating instructions, we will follow an instruction with a comment +that defines the type and name of value produced. + +High Level Structure +==================== + +Module Structure +---------------- + +LLVM programs are composed of ``Module``'s, each of which is a +translation unit of the input programs. Each module consists of +functions, global variables, and symbol table entries. Modules may be +combined together with the LLVM linker, which merges function (and +global variable) definitions, resolves forward declarations, and merges +symbol table entries. Here is an example of the "hello world" module: + +.. code-block:: llvm + + ; Declare the string constant as a global constant. + @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" + + ; External declaration of the puts function + declare i32 @puts(i8* nocapture) nounwind + + ; Definition of main function + define i32 @main() { ; i32()* + ; Convert [13 x i8]* to i8 *... + %cast210 = getelementptr [13 x i8]* @.str, i64 0, i64 0 + + ; Call puts function to write out the string to stdout. + call i32 @puts(i8* %cast210) + ret i32 0 + } + + ; Named metadata + !1 = metadata !{i32 42} + !foo = !{!1, null} + +This example is made up of a :ref:`global variable ` named +"``.str``", an external declaration of the "``puts``" function, a +:ref:`function definition ` for "``main``" and +:ref:`named metadata ` "``foo``". + +In general, a module is made up of a list of global values (where both +functions and global variables are global values). Global values are +represented by a pointer to a memory location (in this case, a pointer +to an array of char, and a pointer to a function), and have one of the +following :ref:`linkage types `. + +.. _linkage: + +Linkage Types +------------- + +All Global Variables and Functions have one of the following types of +linkage: + +``private`` + Global values with "``private``" linkage are only directly + accessible by objects in the current module. In particular, linking + code into a module with an private global value may cause the + private to be renamed as necessary to avoid collisions. Because the + symbol is private to the module, all references can be updated. This + doesn't show up in any symbol table in the object file. +``linker_private`` + Similar to ``private``, but the symbol is passed through the + assembler and evaluated by the linker. Unlike normal strong symbols, + they are removed by the linker from the final linked image + (executable or dynamic library). +``linker_private_weak`` + Similar to "``linker_private``", but the symbol is weak. Note that + ``linker_private_weak`` symbols are subject to coalescing by the + linker. The symbols are removed by the linker from the final linked + image (executable or dynamic library). +``internal`` + Similar to private, but the value shows as a local symbol + (``STB_LOCAL`` in the case of ELF) in the object file. This + corresponds to the notion of the '``static``' keyword in C. +``available_externally`` + Globals with "``available_externally``" linkage are never emitted + into the object file corresponding to the LLVM module. They exist to + allow inlining and other optimizations to take place given knowledge + of the definition of the global, which is known to be somewhere + outside the module. Globals with ``available_externally`` linkage + are allowed to be discarded at will, and are otherwise the same as + ``linkonce_odr``. This linkage type is only allowed on definitions, + not declarations. +``linkonce`` + Globals with "``linkonce``" linkage are merged with other globals of + the same name when linkage occurs. This can be used to implement + some forms of inline functions, templates, or other code which must + be generated in each translation unit that uses it, but where the + body may be overridden with a more definitive definition later. + Unreferenced ``linkonce`` globals are allowed to be discarded. Note + that ``linkonce`` linkage does not actually allow the optimizer to + inline the body of this function into callers because it doesn't + know if this definition of the function is the definitive definition + within the program or whether it will be overridden by a stronger + definition. To enable inlining and other optimizations, use + "``linkonce_odr``" linkage. +``weak`` + "``weak``" linkage has the same merging semantics as ``linkonce`` + linkage, except that unreferenced globals with ``weak`` linkage may + not be discarded. This is used for globals that are declared "weak" + in C source code. +``common`` + "``common``" linkage is most similar to "``weak``" linkage, but they + are used for tentative definitions in C, such as "``int X;``" at + global scope. Symbols with "``common``" linkage are merged in the + same way as ``weak symbols``, and they may not be deleted if + unreferenced. ``common`` symbols may not have an explicit section, + must have a zero initializer, and may not be marked + ':ref:`constant `'. Functions and aliases may not have + common linkage. + +.. _linkage_appending: + +``appending`` + "``appending``" linkage may only be applied to global variables of + pointer to array type. When two global variables with appending + linkage are linked together, the two global arrays are appended + together. This is the LLVM, typesafe, equivalent of having the + system linker append together "sections" with identical names when + .o files are linked. +``extern_weak`` + The semantics of this linkage follow the ELF object file model: the + symbol is weak until linked, if not linked, the symbol becomes null + instead of being an undefined reference. +``linkonce_odr``, ``weak_odr`` + Some languages allow differing globals to be merged, such as two + functions with different semantics. Other languages, such as + ``C++``, ensure that only equivalent globals are ever merged (the + "one definition rule" --- "ODR"). Such languages can use the + ``linkonce_odr`` and ``weak_odr`` linkage types to indicate that the + global will only be merged with equivalent globals. These linkage + types are otherwise the same as their non-``odr`` versions. +``linkonce_odr_auto_hide`` + Similar to "``linkonce_odr``", but nothing in the translation unit + takes the address of this definition. For instance, functions that + had an inline definition, but the compiler decided not to inline it. + ``linkonce_odr_auto_hide`` may have only ``default`` visibility. The + symbols are removed by the linker from the final linked image + (executable or dynamic library). +``external`` + If none of the above identifiers are used, the global is externally + visible, meaning that it participates in linkage and can be used to + resolve external symbol references. + +The next two types of linkage are targeted for Microsoft Windows +platform only. They are designed to support importing (exporting) +symbols from (to) DLLs (Dynamic Link Libraries). + +``dllimport`` + "``dllimport``" linkage causes the compiler to reference a function + or variable via a global pointer to a pointer that is set up by the + DLL exporting the symbol. On Microsoft Windows targets, the pointer + name is formed by combining ``__imp_`` and the function or variable + name. +``dllexport`` + "``dllexport``" linkage causes the compiler to provide a global + pointer to a pointer in a DLL, so that it can be referenced with the + ``dllimport`` attribute. On Microsoft Windows targets, the pointer + name is formed by combining ``__imp_`` and the function or variable + name. + +For example, since the "``.LC0``" variable is defined to be internal, if +another module defined a "``.LC0``" variable and was linked with this +one, one of the two would be renamed, preventing a collision. Since +"``main``" and "``puts``" are external (i.e., lacking any linkage +declarations), they are accessible outside of the current module. + +It is illegal for a function *declaration* to have any linkage type +other than ``external``, ``dllimport`` or ``extern_weak``. + +Aliases can have only ``external``, ``internal``, ``weak`` or +``weak_odr`` linkages. + +.. _callingconv: + +Calling Conventions +------------------- + +LLVM :ref:`functions `, :ref:`calls ` and +:ref:`invokes ` can all have an optional calling convention +specified for the call. The calling convention of any pair of dynamic +caller/callee must match, or the behavior of the program is undefined. +The following calling conventions are supported by LLVM, and more may be +added in the future: + +"``ccc``" - The C calling convention + This calling convention (the default if no other calling convention + is specified) matches the target C calling conventions. This calling + convention supports varargs function calls and tolerates some + mismatch in the declared prototype and implemented declaration of + the function (as does normal C). +"``fastcc``" - The fast calling convention + This calling convention attempts to make calls as fast as possible + (e.g. by passing things in registers). This calling convention + allows the target to use whatever tricks it wants to produce fast + code for the target, without having to conform to an externally + specified ABI (Application Binary Interface). `Tail calls can only + be optimized when this, the GHC or the HiPE convention is + used. `_ This calling convention does not + support varargs and requires the prototype of all callees to exactly + match the prototype of the function definition. +"``coldcc``" - The cold calling convention + This calling convention attempts to make code in the caller as + efficient as possible under the assumption that the call is not + commonly executed. As such, these calls often preserve all registers + so that the call does not break any live ranges in the caller side. + This calling convention does not support varargs and requires the + prototype of all callees to exactly match the prototype of the + function definition. +"``cc 10``" - GHC convention + This calling convention has been implemented specifically for use by + the `Glasgow Haskell Compiler (GHC) `_. + It passes everything in registers, going to extremes to achieve this + by disabling callee save registers. This calling convention should + not be used lightly but only for specific situations such as an + alternative to the *register pinning* performance technique often + used when implementing functional programming languages. At the + moment only X86 supports this convention and it has the following + limitations: + + - On *X86-32* only supports up to 4 bit type parameters. No + floating point types are supported. + - On *X86-64* only supports up to 10 bit type parameters and 6 + floating point parameters. + + This calling convention supports `tail call + optimization `_ but requires both the + caller and callee are using it. +"``cc 11``" - The HiPE calling convention + This calling convention has been implemented specifically for use by + the `High-Performance Erlang + (HiPE) `_ compiler, *the* + native code compiler of the `Ericsson's Open Source Erlang/OTP + system `_. It uses more + registers for argument passing than the ordinary C calling + convention and defines no callee-saved registers. The calling + convention properly supports `tail call + optimization `_ but requires that both the + caller and the callee use it. It uses a *register pinning* + mechanism, similar to GHC's convention, for keeping frequently + accessed runtime components pinned to specific hardware registers. + At the moment only X86 supports this convention (both 32 and 64 + bit). +"``cc ``" - Numbered convention + Any calling convention may be specified by number, allowing + target-specific calling conventions to be used. Target specific + calling conventions start at 64. + +More calling conventions can be added/defined on an as-needed basis, to +support Pascal conventions or any other well-known target-independent +convention. + +Visibility Styles +----------------- + +All Global Variables and Functions have one of the following visibility +styles: + +"``default``" - Default style + On targets that use the ELF object file format, default visibility + means that the declaration is visible to other modules and, in + shared libraries, means that the declared entity may be overridden. + On Darwin, default visibility means that the declaration is visible + to other modules. Default visibility corresponds to "external + linkage" in the language. +"``hidden``" - Hidden style + Two declarations of an object with hidden visibility refer to the + same object if they are in the same shared object. Usually, hidden + visibility indicates that the symbol will not be placed into the + dynamic symbol table, so no other module (executable or shared + library) can reference it directly. +"``protected``" - Protected style + On ELF, protected visibility indicates that the symbol will be + placed in the dynamic symbol table, but that references within the + defining module will bind to the local symbol. That is, the symbol + cannot be overridden by another module. + +Named Types +----------- + +LLVM IR allows you to specify name aliases for certain types. This can +make it easier to read the IR and make the IR more condensed +(particularly when recursive types are involved). An example of a name +specification is: + +.. code-block:: llvm + + %mytype = type { %mytype*, i32 } + +You may give a name to any :ref:`type ` except +":ref:`void `". Type name aliases may be used anywhere a type is +expected with the syntax "%mytype". + +Note that type names are aliases for the structural type that they +indicate, and that you can therefore specify multiple names for the same +type. This often leads to confusing behavior when dumping out a .ll +file. Since LLVM IR uses structural typing, the name is not part of the +type. When printing out LLVM IR, the printer will pick *one name* to +render all types of a particular shape. This means that if you have code +where two different source types end up having the same LLVM type, that +the dumper will sometimes print the "wrong" or unexpected type. This is +an important design point and isn't going to change. + +.. _globalvars: + +Global Variables +---------------- + +Global variables define regions of memory allocated at compilation time +instead of run-time. Global variables may optionally be initialized, may +have an explicit section to be placed in, and may have an optional +explicit alignment specified. + +A variable may be defined as ``thread_local``, which means that it will +not be shared by threads (each thread will have a separated copy of the +variable). Not all targets support thread-local variables. Optionally, a +TLS model may be specified: + +``localdynamic`` + For variables that are only used within the current shared library. +``initialexec`` + For variables in modules that will not be loaded dynamically. +``localexec`` + For variables defined in the executable and only used within it. + +The models correspond to the ELF TLS models; see `ELF Handling For +Thread-Local Storage `_ for +more information on under which circumstances the different models may +be used. The target may choose a different TLS model if the specified +model is not supported, or if a better choice of model can be made. + +A variable may be defined as a global ``constant``, which indicates that +the contents of the variable will **never** be modified (enabling better +optimization, allowing the global data to be placed in the read-only +section of an executable, etc). Note that variables that need runtime +initialization cannot be marked ``constant`` as there is a store to the +variable. + +LLVM explicitly allows *declarations* of global variables to be marked +constant, even if the final definition of the global is not. This +capability can be used to enable slightly better optimization of the +program, but requires the language definition to guarantee that +optimizations based on the 'constantness' are valid for the translation +units that do not include the definition. + +As SSA values, global variables define pointer values that are in scope +(i.e. they dominate) all basic blocks in the program. Global variables +always define a pointer to their "content" type because they describe a +region of memory, and all memory objects in LLVM are accessed through +pointers. + +Global variables can be marked with ``unnamed_addr`` which indicates +that the address is not significant, only the content. Constants marked +like this can be merged with other constants if they have the same +initializer. Note that a constant with significant address *can* be +merged with a ``unnamed_addr`` constant, the result being a constant +whose address is significant. + +A global variable may be declared to reside in a target-specific +numbered address space. For targets that support them, address spaces +may affect how optimizations are performed and/or what target +instructions are used to access the variable. The default address space +is zero. The address space qualifier must precede any other attributes. + +LLVM allows an explicit section to be specified for globals. If the +target supports it, it will emit globals to the section specified. + +By default, global initializers are optimized by assuming that global +variables defined within the module are not modified from their +initial values before the start of the global initializer. This is +true even for variables potentially accessible from outside the +module, including those with external linkage or appearing in +``@llvm.used``. This assumption may be suppressed by marking the +variable with ``externally_initialized``. + +An explicit alignment may be specified for a global, which must be a +power of 2. If not present, or if the alignment is set to zero, the +alignment of the global is set by the target to whatever it feels +convenient. If an explicit alignment is specified, the global is forced +to have exactly that alignment. Targets and optimizers are not allowed +to over-align the global if the global has an assigned section. In this +case, the extra alignment could be observable: for example, code could +assume that the globals are densely packed in their section and try to +iterate over them as an array, alignment padding would break this +iteration. + +For example, the following defines a global in a numbered address space +with an initializer, section, and alignment: + +.. code-block:: llvm + + @G = addrspace(5) constant float 1.0, section "foo", align 4 + +The following example defines a thread-local global with the +``initialexec`` TLS model: + +.. code-block:: llvm + + @G = thread_local(initialexec) global i32 0, align 4 + +.. _functionstructure: + +Functions +--------- + +LLVM function definitions consist of the "``define``" keyword, an +optional :ref:`linkage type `, an optional :ref:`visibility +style `, an optional :ref:`calling convention `, +an optional ``unnamed_addr`` attribute, a return type, an optional +:ref:`parameter attribute ` for the return type, a function +name, a (possibly empty) argument list (each with optional :ref:`parameter +attributes `), optional :ref:`function attributes `, +an optional section, an optional alignment, an optional :ref:`garbage +collector name `, an opening curly brace, a list of basic blocks, +and a closing curly brace. + +LLVM function declarations consist of the "``declare``" keyword, an +optional :ref:`linkage type `, an optional :ref:`visibility +style `, an optional :ref:`calling convention `, +an optional ``unnamed_addr`` attribute, a return type, an optional +:ref:`parameter attribute ` for the return type, a function +name, a possibly empty list of arguments, an optional alignment, and an +optional :ref:`garbage collector name `. + +A function definition contains a list of basic blocks, forming the CFG +(Control Flow Graph) for the function. Each basic block may optionally +start with a label (giving the basic block a symbol table entry), +contains a list of instructions, and ends with a +:ref:`terminator ` instruction (such as a branch or function +return). + +The first basic block in a function is special in two ways: it is +immediately executed on entrance to the function, and it is not allowed +to have predecessor basic blocks (i.e. there can not be any branches to +the entry block of a function). Because the block can have no +predecessors, it also cannot have any :ref:`PHI nodes `. + +LLVM allows an explicit section to be specified for functions. If the +target supports it, it will emit functions to the section specified. + +An explicit alignment may be specified for a function. If not present, +or if the alignment is set to zero, the alignment of the function is set +by the target to whatever it feels convenient. If an explicit alignment +is specified, the function is forced to have at least that much +alignment. All alignments must be a power of 2. + +If the ``unnamed_addr`` attribute is given, the address is know to not +be significant and two identical functions can be merged. + +Syntax:: + + define [linkage] [visibility] + [cconv] [ret attrs] + @ ([argument list]) + [fn Attrs] [section "name"] [align N] + [gc] { ... } + +Aliases +------- + +Aliases act as "second name" for the aliasee value (which can be either +function, global variable, another alias or bitcast of global value). +Aliases may have an optional :ref:`linkage type `, and an optional +:ref:`visibility style `. + +Syntax:: + + @ = alias [Linkage] [Visibility] @ + +.. _namedmetadatastructure: + +Named Metadata +-------------- + +Named metadata is a collection of metadata. :ref:`Metadata +nodes ` (but not metadata strings) are the only valid +operands for a named metadata. + +Syntax:: + + ; Some unnamed metadata nodes, which are referenced by the named metadata. + !0 = metadata !{metadata !"zero"} + !1 = metadata !{metadata !"one"} + !2 = metadata !{metadata !"two"} + ; A named metadata. + !name = !{!0, !1, !2} + +.. _paramattrs: + +Parameter Attributes +-------------------- + +The return type and each parameter of a function type may have a set of +*parameter attributes* associated with them. Parameter attributes are +used to communicate additional information about the result or +parameters of a function. Parameter attributes are considered to be part +of the function, not of the function type, so functions with different +parameter attributes can have the same function type. + +Parameter attributes are simple keywords that follow the type specified. +If multiple parameter attributes are needed, they are space separated. +For example: + +.. code-block:: llvm + + declare i32 @printf(i8* noalias nocapture, ...) + declare i32 @atoi(i8 zeroext) + declare signext i8 @returns_signed_char() + +Note that any attributes for the function result (``nounwind``, +``readonly``) come immediately after the argument list. + +Currently, only the following parameter attributes are defined: + +``zeroext`` + This indicates to the code generator that the parameter or return + value should be zero-extended to the extent required by the target's + ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by + the caller (for a parameter) or the callee (for a return value). +``signext`` + This indicates to the code generator that the parameter or return + value should be sign-extended to the extent required by the target's + ABI (which is usually 32-bits) by the caller (for a parameter) or + the callee (for a return value). +``inreg`` + This indicates that this parameter or return value should be treated + in a special target-dependent fashion during while emitting code for + a function call or return (usually, by putting it in a register as + opposed to memory, though some targets use it to distinguish between + two different kinds of registers). Use of this attribute is + target-specific. +``byval`` + This indicates that the pointer parameter should really be passed by + value to the function. The attribute implies that a hidden copy of + the pointee is made between the caller and the callee, so the callee + is unable to modify the value in the caller. This attribute is only + valid on LLVM pointer arguments. It is generally used to pass + structs and arrays by value, but is also valid on pointers to + scalars. The copy is considered to belong to the caller not the + callee (for example, ``readonly`` functions should not write to + ``byval`` parameters). This is not a valid attribute for return + values. + + The byval attribute also supports specifying an alignment with the + align attribute. It indicates the alignment of the stack slot to + form and the known alignment of the pointer specified to the call + site. If the alignment is not specified, then the code generator + makes a target-specific assumption. + +``sret`` + This indicates that the pointer parameter specifies the address of a + structure that is the return value of the function in the source + program. This pointer must be guaranteed by the caller to be valid: + loads and stores to the structure may be assumed by the callee + not to trap and to be properly aligned. This may only be applied to + the first parameter. This is not a valid attribute for return + values. +``noalias`` + This indicates that pointer values `*based* ` on + the argument or return value do not alias pointer values which are + not *based* on it, ignoring certain "irrelevant" dependencies. For a + call to the parent function, dependencies between memory references + from before or after the call and from those during the call are + "irrelevant" to the ``noalias`` keyword for the arguments and return + value used in that call. The caller shares the responsibility with + the callee for ensuring that these requirements are met. For further + details, please see the discussion of the NoAlias response in `alias + analysis `_. + + Note that this definition of ``noalias`` is intentionally similar + to the definition of ``restrict`` in C99 for function arguments, + though it is slightly weaker. + + For function return values, C99's ``restrict`` is not meaningful, + while LLVM's ``noalias`` is. +``nocapture`` + This indicates that the callee does not make any copies of the + pointer that outlive the callee itself. This is not a valid + attribute for return values. + +.. _nest: + +``nest`` + This indicates that the pointer parameter can be excised using the + :ref:`trampoline intrinsics `. This is not a valid + attribute for return values. +``nobuiltin`` + This indicates that the callee function at a call site is not + recognized as a built-in function. LLVM will retain the original call + and not replace it with equivalent code based on the semantics of the + built-in function. + +.. _gc: + +Garbage Collector Names +----------------------- + +Each function may specify a garbage collector name, which is simply a +string: + +.. code-block:: llvm + + define void @f() gc "name" { ... } + +The compiler declares the supported values of *name*. Specifying a +collector which will cause the compiler to alter its output in order to +support the named garbage collection algorithm. + +.. _attrgrp: + +Attribute Groups +---------------- + +Attribute groups are groups of attributes that are referenced by objects within +the IR. They are important for keeping ``.ll`` files readable, because a lot of +functions will use the same set of attributes. In the degenerative case of a +``.ll`` file that corresponds to a single ``.c`` file, the single attribute +group will capture the important command line flags used to build that file. + +An attribute group is a module-level object. To use an attribute group, an +object references the attribute group's ID (e.g. ``#37``). An object may refer +to more than one attribute group. In that situation, the attributes from the +different groups are merged. + +Here is an example of attribute groups for a function that should always be +inlined, has a stack alignment of 4, and which shouldn't use SSE instructions: + +.. code-block:: llvm + + ; Target-independent attributes: + #0 = attributes { alwaysinline alignstack=4 } + + ; Target-dependent attributes: + #1 = attributes { "no-sse" } + + ; Function @f has attributes: alwaysinline, alignstack=4, and "no-sse". + define void @f() #0 #1 { ... } + +.. _fnattrs: + +Function Attributes +------------------- + +Function attributes are set to communicate additional information about +a function. Function attributes are considered to be part of the +function, not of the function type, so functions with different function +attributes can have the same function type. + +Function attributes are simple keywords that follow the type specified. +If multiple attributes are needed, they are space separated. For +example: + +.. code-block:: llvm + + define void @f() noinline { ... } + define void @f() alwaysinline { ... } + define void @f() alwaysinline optsize { ... } + define void @f() optsize { ... } + +``alignstack()`` + This attribute indicates that, when emitting the prologue and + epilogue, the backend should forcibly align the stack pointer. + Specify the desired alignment, which must be a power of two, in + parentheses. +``alwaysinline`` + This attribute indicates that the inliner should attempt to inline + this function into callers whenever possible, ignoring any active + inlining size threshold for this caller. +``nonlazybind`` + This attribute suppresses lazy symbol binding for the function. This + may make calls to the function faster, at the cost of extra program + startup time if the function is not called during program startup. +``inlinehint`` + This attribute indicates that the source code contained a hint that + inlining this function is desirable (such as the "inline" keyword in + C/C++). It is just a hint; it imposes no requirements on the + inliner. +``naked`` + This attribute disables prologue / epilogue emission for the + function. This can have very system-specific consequences. +``noduplicate`` + This attribute indicates that calls to the function cannot be + duplicated. A call to a ``noduplicate`` function may be moved + within its parent function, but may not be duplicated within + its parent function. + + A function containing a ``noduplicate`` call may still + be an inlining candidate, provided that the call is not + duplicated by inlining. That implies that the function has + internal linkage and only has one call site, so the original + call is dead after inlining. +``noimplicitfloat`` + This attributes disables implicit floating point instructions. +``noinline`` + This attribute indicates that the inliner should never inline this + function in any situation. This attribute may not be used together + with the ``alwaysinline`` attribute. +``noredzone`` + This attribute indicates that the code generator should not use a + red zone, even if the target-specific ABI normally permits it. +``noreturn`` + This function attribute indicates that the function never returns + normally. This produces undefined behavior at runtime if the + function ever does dynamically return. +``nounwind`` + This function attribute indicates that the function never returns + with an unwind or exceptional control flow. If the function does + unwind, its runtime behavior is undefined. +``optsize`` + This attribute suggests that optimization passes and code generator + passes make choices that keep the code size of this function low, + and otherwise do optimizations specifically to reduce code size. +``readnone`` + This attribute indicates that the function computes its result (or + decides to unwind an exception) based strictly on its arguments, + without dereferencing any pointer arguments or otherwise accessing + any mutable state (e.g. memory, control registers, etc) visible to + caller functions. It does not write through any pointer arguments + (including ``byval`` arguments) and never changes any state visible + to callers. This means that it cannot unwind exceptions by calling + the ``C++`` exception throwing methods. +``readonly`` + This attribute indicates that the function does not write through + any pointer arguments (including ``byval`` arguments) or otherwise + modify any state (e.g. memory, control registers, etc) visible to + caller functions. It may dereference pointer arguments and read + state that may be set in the caller. A readonly function always + returns the same value (or unwinds an exception identically) when + called with the same set of arguments and global state. It cannot + unwind an exception by calling the ``C++`` exception throwing + methods. +``returns_twice`` + This attribute indicates that this function can return twice. The C + ``setjmp`` is an example of such a function. The compiler disables + some optimizations (like tail calls) in the caller of these + functions. +``sanitize_address`` + This attribute indicates that AddressSanitizer checks + (dynamic address safety analysis) are enabled for this function. +``sanitize_memory`` + This attribute indicates that MemorySanitizer checks (dynamic detection + of accesses to uninitialized memory) are enabled for this function. +``sanitize_thread`` + This attribute indicates that ThreadSanitizer checks + (dynamic thread safety analysis) are enabled for this function. +``ssp`` + This attribute indicates that the function should emit a stack + smashing protector. It is in the form of a "canary" --- a random value + placed on the stack before the local variables that's checked upon + return from the function to see if it has been overwritten. A + heuristic is used to determine if a function needs stack protectors + or not. The heuristic used will enable protectors for functions with: + + - Character arrays larger than ``ssp-buffer-size`` (default 8). + - Aggregates containing character arrays larger than ``ssp-buffer-size``. + - Calls to alloca() with variable sizes or constant sizes greater than + ``ssp-buffer-size``. + + If a function that has an ``ssp`` attribute is inlined into a + function that doesn't have an ``ssp`` attribute, then the resulting + function will have an ``ssp`` attribute. +``sspreq`` + This attribute indicates that the function should *always* emit a + stack smashing protector. This overrides the ``ssp`` function + attribute. + + If a function that has an ``sspreq`` attribute is inlined into a + function that doesn't have an ``sspreq`` attribute or which has an + ``ssp`` or ``sspstrong`` attribute, then the resulting function will have + an ``sspreq`` attribute. +``sspstrong`` + This attribute indicates that the function should emit a stack smashing + protector. This attribute causes a strong heuristic to be used when + determining if a function needs stack protectors. The strong heuristic + will enable protectors for functions with: + + - Arrays of any size and type + - Aggregates containing an array of any size and type. + - Calls to alloca(). + - Local variables that have had their address taken. + + This overrides the ``ssp`` function attribute. + + If a function that has an ``sspstrong`` attribute is inlined into a + function that doesn't have an ``sspstrong`` attribute, then the + resulting function will have an ``sspstrong`` attribute. +``uwtable`` + This attribute indicates that the ABI being targeted requires that + an unwind table entry be produce for this function even if we can + show that no exceptions passes by it. This is normally the case for + the ELF x86-64 abi, but it can be disabled for some compilation + units. + +.. _moduleasm: + +Module-Level Inline Assembly +---------------------------- + +Modules may contain "module-level inline asm" blocks, which corresponds +to the GCC "file scope inline asm" blocks. These blocks are internally +concatenated by LLVM and treated as a single unit, but may be separated +in the ``.ll`` file if desired. The syntax is very simple: + +.. code-block:: llvm + + module asm "inline asm code goes here" + module asm "more can go here" + +The strings can contain any character by escaping non-printable +characters. The escape sequence used is simply "\\xx" where "xx" is the +two digit hex code for the number. + +The inline asm code is simply printed to the machine code .s file when +assembly code is generated. + +Data Layout +----------- + +A module may specify a target specific data layout string that specifies +how data is to be laid out in memory. The syntax for the data layout is +simply: + +.. code-block:: llvm + + target datalayout = "layout specification" + +The *layout specification* consists of a list of specifications +separated by the minus sign character ('-'). Each specification starts +with a letter and may include other information after the letter to +define some aspect of the data layout. The specifications accepted are +as follows: + +``E`` + Specifies that the target lays out data in big-endian form. That is, + the bits with the most significance have the lowest address + location. +``e`` + Specifies that the target lays out data in little-endian form. That + is, the bits with the least significance have the lowest address + location. +``S`` + Specifies the natural alignment of the stack in bits. Alignment + promotion of stack variables is limited to the natural stack + alignment to avoid dynamic stack realignment. The stack alignment + must be a multiple of 8-bits. If omitted, the natural stack + alignment defaults to "unspecified", which does not prevent any + alignment promotions. +``p[n]:::`` + This specifies the *size* of a pointer and its ```` and + ````\erred alignments for address space ``n``. All sizes are in + bits. Specifying the ```` alignment is optional. If omitted, the + preceding ``:`` should be omitted too. The address space, ``n`` is + optional, and if not specified, denotes the default address space 0. + The value of ``n`` must be in the range [1,2^23). +``i::`` + This specifies the alignment for an integer type of a given bit + ````. The value of ```` must be in the range [1,2^23). +``v::`` + This specifies the alignment for a vector type of a given bit + ````. +``f::`` + This specifies the alignment for a floating point type of a given bit + ````. Only values of ```` that are supported by the target + will work. 32 (float) and 64 (double) are supported on all targets; 80 + or 128 (different flavors of long double) are also supported on some + targets. +``a::`` + This specifies the alignment for an aggregate type of a given bit + ````. +``s::`` + This specifies the alignment for a stack object of a given bit + ````. +``n::...`` + This specifies a set of native integer widths for the target CPU in + bits. For example, it might contain ``n32`` for 32-bit PowerPC, + ``n32:64`` for PowerPC 64, or ``n8:16:32:64`` for X86-64. Elements of + this set are considered to support most general arithmetic operations + efficiently. + +When constructing the data layout for a given target, LLVM starts with a +default set of specifications which are then (possibly) overridden by +the specifications in the ``datalayout`` keyword. The default +specifications are given in this list: + +- ``E`` - big endian +- ``p:64:64:64`` - 64-bit pointers with 64-bit alignment +- ``S0`` - natural stack alignment is unspecified +- ``i1:8:8`` - i1 is 8-bit (byte) aligned +- ``i8:8:8`` - i8 is 8-bit (byte) aligned +- ``i16:16:16`` - i16 is 16-bit aligned +- ``i32:32:32`` - i32 is 32-bit aligned +- ``i64:32:64`` - i64 has ABI alignment of 32-bits but preferred + alignment of 64-bits +- ``f16:16:16`` - half is 16-bit aligned +- ``f32:32:32`` - float is 32-bit aligned +- ``f64:64:64`` - double is 64-bit aligned +- ``f128:128:128`` - quad is 128-bit aligned +- ``v64:64:64`` - 64-bit vector is 64-bit aligned +- ``v128:128:128`` - 128-bit vector is 128-bit aligned +- ``a0:0:64`` - aggregates are 64-bit aligned + +When LLVM is determining the alignment for a given type, it uses the +following rules: + +#. If the type sought is an exact match for one of the specifications, + that specification is used. +#. If no match is found, and the type sought is an integer type, then + the smallest integer type that is larger than the bitwidth of the + sought type is used. If none of the specifications are larger than + the bitwidth then the largest integer type is used. For example, + given the default specifications above, the i7 type will use the + alignment of i8 (next largest) while both i65 and i256 will use the + alignment of i64 (largest specified). +#. If no match is found, and the type sought is a vector type, then the + largest vector type that is smaller than the sought vector type will + be used as a fall back. This happens because <128 x double> can be + implemented in terms of 64 <2 x double>, for example. + +The function of the data layout string may not be what you expect. +Notably, this is not a specification from the frontend of what alignment +the code generator should use. + +Instead, if specified, the target data layout is required to match what +the ultimate *code generator* expects. This string is used by the +mid-level optimizers to improve code, and this only works if it matches +what the ultimate code generator uses. If you would like to generate IR +that does not embed this target-specific detail into the IR, then you +don't have to specify the string. This will disable some optimizations +that require precise layout information, but this also prevents those +optimizations from introducing target specificity into the IR. + +.. _pointeraliasing: + +Pointer Aliasing Rules +---------------------- + +Any memory access must be done through a pointer value associated with +an address range of the memory access, otherwise the behavior is +undefined. Pointer values are associated with address ranges according +to the following rules: + +- A pointer value is associated with the addresses associated with any + value it is *based* on. +- An address of a global variable is associated with the address range + of the variable's storage. +- The result value of an allocation instruction is associated with the + address range of the allocated storage. +- A null pointer in the default address-space is associated with no + address. +- An integer constant other than zero or a pointer value returned from + a function not defined within LLVM may be associated with address + ranges allocated through mechanisms other than those provided by + LLVM. Such ranges shall not overlap with any ranges of addresses + allocated by mechanisms provided by LLVM. + +A pointer value is *based* on another pointer value according to the +following rules: + +- A pointer value formed from a ``getelementptr`` operation is *based* + on the first operand of the ``getelementptr``. +- The result value of a ``bitcast`` is *based* on the operand of the + ``bitcast``. +- A pointer value formed by an ``inttoptr`` is *based* on all pointer + values that contribute (directly or indirectly) to the computation of + the pointer's value. +- The "*based* on" relationship is transitive. + +Note that this definition of *"based"* is intentionally similar to the +definition of *"based"* in C99, though it is slightly weaker. + +LLVM IR does not associate types with memory. The result type of a +``load`` merely indicates the size and alignment of the memory from +which to load, as well as the interpretation of the value. The first +operand type of a ``store`` similarly only indicates the size and +alignment of the store. + +Consequently, type-based alias analysis, aka TBAA, aka +``-fstrict-aliasing``, is not applicable to general unadorned LLVM IR. +:ref:`Metadata ` may be used to encode additional information +which specialized optimization passes may use to implement type-based +alias analysis. + +.. _volatile: + +Volatile Memory Accesses +------------------------ + +Certain memory accesses, such as :ref:`load `'s, +:ref:`store `'s, and :ref:`llvm.memcpy `'s may be +marked ``volatile``. The optimizers must not change the number of +volatile operations or change their order of execution relative to other +volatile operations. The optimizers *may* change the order of volatile +operations relative to non-volatile operations. This is not Java's +"volatile" and has no cross-thread synchronization behavior. + +IR-level volatile loads and stores cannot safely be optimized into +llvm.memcpy or llvm.memmove intrinsics even when those intrinsics are +flagged volatile. Likewise, the backend should never split or merge +target-legal volatile load/store instructions. + +.. admonition:: Rationale + + Platforms may rely on volatile loads and stores of natively supported + data width to be executed as single instruction. For example, in C + this holds for an l-value of volatile primitive type with native + hardware support, but not necessarily for aggregate types. The + frontend upholds these expectations, which are intentionally + unspecified in the IR. The rules above ensure that IR transformation + do not violate the frontend's contract with the language. + +.. _memmodel: + +Memory Model for Concurrent Operations +-------------------------------------- + +The LLVM IR does not define any way to start parallel threads of +execution or to register signal handlers. Nonetheless, there are +platform-specific ways to create them, and we define LLVM IR's behavior +in their presence. This model is inspired by the C++0x memory model. + +For a more informal introduction to this model, see the :doc:`Atomics`. + +We define a *happens-before* partial order as the least partial order +that + +- Is a superset of single-thread program order, and +- When a *synchronizes-with* ``b``, includes an edge from ``a`` to + ``b``. *Synchronizes-with* pairs are introduced by platform-specific + techniques, like pthread locks, thread creation, thread joining, + etc., and by atomic instructions. (See also :ref:`Atomic Memory Ordering + Constraints `). + +Note that program order does not introduce *happens-before* edges +between a thread and signals executing inside that thread. + +Every (defined) read operation (load instructions, memcpy, atomic +loads/read-modify-writes, etc.) R reads a series of bytes written by +(defined) write operations (store instructions, atomic +stores/read-modify-writes, memcpy, etc.). For the purposes of this +section, initialized globals are considered to have a write of the +initializer which is atomic and happens before any other read or write +of the memory in question. For each byte of a read R, R\ :sub:`byte` +may see any write to the same byte, except: + +- If write\ :sub:`1` happens before write\ :sub:`2`, and + write\ :sub:`2` happens before R\ :sub:`byte`, then + R\ :sub:`byte` does not see write\ :sub:`1`. +- If R\ :sub:`byte` happens before write\ :sub:`3`, then + R\ :sub:`byte` does not see write\ :sub:`3`. + +Given that definition, R\ :sub:`byte` is defined as follows: + +- If R is volatile, the result is target-dependent. (Volatile is + supposed to give guarantees which can support ``sig_atomic_t`` in + C/C++, and may be used for accesses to addresses which do not behave + like normal memory. It does not generally provide cross-thread + synchronization.) +- Otherwise, if there is no write to the same byte that happens before + R\ :sub:`byte`, R\ :sub:`byte` returns ``undef`` for that byte. +- Otherwise, if R\ :sub:`byte` may see exactly one write, + R\ :sub:`byte` returns the value written by that write. +- Otherwise, if R is atomic, and all the writes R\ :sub:`byte` may + see are atomic, it chooses one of the values written. See the :ref:`Atomic + Memory Ordering Constraints ` section for additional + constraints on how the choice is made. +- Otherwise R\ :sub:`byte` returns ``undef``. + +R returns the value composed of the series of bytes it read. This +implies that some bytes within the value may be ``undef`` **without** +the entire value being ``undef``. Note that this only defines the +semantics of the operation; it doesn't mean that targets will emit more +than one instruction to read the series of bytes. + +Note that in cases where none of the atomic intrinsics are used, this +model places only one restriction on IR transformations on top of what +is required for single-threaded execution: introducing a store to a byte +which might not otherwise be stored is not allowed in general. +(Specifically, in the case where another thread might write to and read +from an address, introducing a store can change a load that may see +exactly one write into a load that may see multiple writes.) + +.. _ordering: + +Atomic Memory Ordering Constraints +---------------------------------- + +Atomic instructions (:ref:`cmpxchg `, +:ref:`atomicrmw `, :ref:`fence `, +:ref:`atomic load `, and :ref:`atomic store `) take +an ordering parameter that determines which other atomic instructions on +the same address they *synchronize with*. These semantics are borrowed +from Java and C++0x, but are somewhat more colloquial. If these +descriptions aren't precise enough, check those specs (see spec +references in the :doc:`atomics guide `). +:ref:`fence ` instructions treat these orderings somewhat +differently since they don't take an address. See that instruction's +documentation for details. + +For a simpler introduction to the ordering constraints, see the +:doc:`Atomics`. + +``unordered`` + The set of values that can be read is governed by the happens-before + partial order. A value cannot be read unless some operation wrote + it. This is intended to provide a guarantee strong enough to model + Java's non-volatile shared variables. This ordering cannot be + specified for read-modify-write operations; it is not strong enough + to make them atomic in any interesting way. +``monotonic`` + In addition to the guarantees of ``unordered``, there is a single + total order for modifications by ``monotonic`` operations on each + address. All modification orders must be compatible with the + happens-before order. There is no guarantee that the modification + orders can be combined to a global total order for the whole program + (and this often will not be possible). The read in an atomic + read-modify-write operation (:ref:`cmpxchg ` and + :ref:`atomicrmw `) reads the value in the modification + order immediately before the value it writes. If one atomic read + happens before another atomic read of the same address, the later + read must see the same value or a later value in the address's + modification order. This disallows reordering of ``monotonic`` (or + stronger) operations on the same address. If an address is written + ``monotonic``-ally by one thread, and other threads ``monotonic``-ally + read that address repeatedly, the other threads must eventually see + the write. This corresponds to the C++0x/C1x + ``memory_order_relaxed``. +``acquire`` + In addition to the guarantees of ``monotonic``, a + *synchronizes-with* edge may be formed with a ``release`` operation. + This is intended to model C++'s ``memory_order_acquire``. +``release`` + In addition to the guarantees of ``monotonic``, if this operation + writes a value which is subsequently read by an ``acquire`` + operation, it *synchronizes-with* that operation. (This isn't a + complete description; see the C++0x definition of a release + sequence.) This corresponds to the C++0x/C1x + ``memory_order_release``. +``acq_rel`` (acquire+release) + Acts as both an ``acquire`` and ``release`` operation on its + address. This corresponds to the C++0x/C1x ``memory_order_acq_rel``. +``seq_cst`` (sequentially consistent) + In addition to the guarantees of ``acq_rel`` (``acquire`` for an + operation which only reads, ``release`` for an operation which only + writes), there is a global total order on all + sequentially-consistent operations on all addresses, which is + consistent with the *happens-before* partial order and with the + modification orders of all the affected addresses. Each + sequentially-consistent read sees the last preceding write to the + same address in this global order. This corresponds to the C++0x/C1x + ``memory_order_seq_cst`` and Java volatile. + +.. _singlethread: + +If an atomic operation is marked ``singlethread``, it only *synchronizes +with* or participates in modification and seq\_cst total orderings with +other operations running in the same thread (for example, in signal +handlers). + +.. _fastmath: + +Fast-Math Flags +--------------- + +LLVM IR floating-point binary ops (:ref:`fadd `, +:ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, +:ref:`frem `) have the following flags that can set to enable +otherwise unsafe floating point operations + +``nnan`` + No NaNs - Allow optimizations to assume the arguments and result are not + NaN. Such optimizations are required to retain defined behavior over + NaNs, but the value of the result is undefined. + +``ninf`` + No Infs - Allow optimizations to assume the arguments and result are not + +/-Inf. Such optimizations are required to retain defined behavior over + +/-Inf, but the value of the result is undefined. + +``nsz`` + No Signed Zeros - Allow optimizations to treat the sign of a zero + argument or result as insignificant. + +``arcp`` + Allow Reciprocal - Allow optimizations to use the reciprocal of an + argument rather than perform division. + +``fast`` + Fast - Allow algebraically equivalent transformations that may + dramatically change results in floating point (e.g. reassociate). This + flag implies all the others. + +.. _typesystem: + +Type System +=========== + +The LLVM type system is one of the most important features of the +intermediate representation. Being typed enables a number of +optimizations to be performed on the intermediate representation +directly, without having to do extra analyses on the side before the +transformation. A strong type system makes it easier to read the +generated code and enables novel analyses and transformations that are +not feasible to perform on normal three address code representations. + +Type Classifications +-------------------- + +The types fall into a few useful classifications: + + +.. list-table:: + :header-rows: 1 + + * - Classification + - Types + + * - :ref:`integer ` + - ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ... + ``i64``, ... + + * - :ref:`floating point ` + - ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``, + ``ppc_fp128`` + + + * - first class + + .. _t_firstclass: + + - :ref:`integer `, :ref:`floating point `, + :ref:`pointer `, :ref:`vector `, + :ref:`structure `, :ref:`array `, + :ref:`label `, :ref:`metadata `. + + * - :ref:`primitive ` + - :ref:`label `, + :ref:`void `, + :ref:`integer `, + :ref:`floating point `, + :ref:`x86mmx `, + :ref:`metadata `. + + * - :ref:`derived ` + - :ref:`array `, + :ref:`function `, + :ref:`pointer `, + :ref:`structure `, + :ref:`vector `, + :ref:`opaque `. + +The :ref:`first class ` types are perhaps the most important. +Values of these types are the only ones which can be produced by +instructions. + +.. _t_primitive: + +Primitive Types +--------------- + +The primitive types are the fundamental building blocks of the LLVM +system. + +.. _t_integer: + +Integer Type +^^^^^^^^^^^^ + +Overview: +""""""""" + +The integer type is a very simple type that simply specifies an +arbitrary bit width for the integer type desired. Any bit width from 1 +bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified. + +Syntax: +""""""" + +:: + + iN + +The number of bits the integer will occupy is specified by the ``N`` +value. + +Examples: +""""""""" + ++----------------+------------------------------------------------+ +| ``i1`` | a single-bit integer. | ++----------------+------------------------------------------------+ +| ``i32`` | a 32-bit integer. | ++----------------+------------------------------------------------+ +| ``i1942652`` | a really big integer of over 1 million bits. | ++----------------+------------------------------------------------+ + +.. _t_floating: + +Floating Point Types +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + + * - Type + - Description + + * - ``half`` + - 16-bit floating point value + + * - ``float`` + - 32-bit floating point value + + * - ``double`` + - 64-bit floating point value + + * - ``fp128`` + - 128-bit floating point value (112-bit mantissa) + + * - ``x86_fp80`` + - 80-bit floating point value (X87) + + * - ``ppc_fp128`` + - 128-bit floating point value (two 64-bits) + +.. _t_x86mmx: + +X86mmx Type +^^^^^^^^^^^ + +Overview: +""""""""" + +The x86mmx type represents a value held in an MMX register on an x86 +machine. The operations allowed on it are quite limited: parameters and +return values, load and store, and bitcast. User-specified MMX +instructions are represented as intrinsic or asm calls with arguments +and/or results of this type. There are no arrays, vectors or constants +of this type. + +Syntax: +""""""" + +:: + + x86mmx + +.. _t_void: + +Void Type +^^^^^^^^^ + +Overview: +""""""""" + +The void type does not represent any value and has no size. + +Syntax: +""""""" + +:: + + void + +.. _t_label: + +Label Type +^^^^^^^^^^ + +Overview: +""""""""" + +The label type represents code labels. + +Syntax: +""""""" + +:: + + label + +.. _t_metadata: + +Metadata Type +^^^^^^^^^^^^^ + +Overview: +""""""""" + +The metadata type represents embedded metadata. No derived types may be +created from metadata except for :ref:`function ` arguments. + +Syntax: +""""""" + +:: + + metadata + +.. _t_derived: + +Derived Types +------------- + +The real power in LLVM comes from the derived types in the system. This +is what allows a programmer to represent arrays, functions, pointers, +and other useful types. Each of these types contain one or more element +types which may be a primitive type, or another derived type. For +example, it is possible to have a two dimensional array, using an array +as the element type of another array. + +.. _t_aggregate: + +Aggregate Types +^^^^^^^^^^^^^^^ + +Aggregate Types are a subset of derived types that can contain multiple +member types. :ref:`Arrays ` and :ref:`structs ` are +aggregate types. :ref:`Vectors ` are not considered to be +aggregate types. + +.. _t_array: + +Array Type +^^^^^^^^^^ + +Overview: +""""""""" + +The array type is a very simple derived type that arranges elements +sequentially in memory. The array type requires a size (number of +elements) and an underlying data type. + +Syntax: +""""""" + +:: + + [<# elements> x ] + +The number of elements is a constant integer value; ``elementtype`` may +be any type with a size. + +Examples: +""""""""" + ++------------------+--------------------------------------+ +| ``[40 x i32]`` | Array of 40 32-bit integer values. | ++------------------+--------------------------------------+ +| ``[41 x i32]`` | Array of 41 32-bit integer values. | ++------------------+--------------------------------------+ +| ``[4 x i8]`` | Array of 4 8-bit integer values. | ++------------------+--------------------------------------+ + +Here are some examples of multidimensional arrays: + ++-----------------------------+----------------------------------------------------------+ +| ``[3 x [4 x i32]]`` | 3x4 array of 32-bit integer values. | ++-----------------------------+----------------------------------------------------------+ +| ``[12 x [10 x float]]`` | 12x10 array of single precision floating point values. | ++-----------------------------+----------------------------------------------------------+ +| ``[2 x [3 x [4 x i16]]]`` | 2x3x4 array of 16-bit integer values. | ++-----------------------------+----------------------------------------------------------+ + +There is no restriction on indexing beyond the end of the array implied +by a static type (though there are restrictions on indexing beyond the +bounds of an allocated object in some cases). This means that +single-dimension 'variable sized array' addressing can be implemented in +LLVM with a zero length array type. An implementation of 'pascal style +arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for +example. + +.. _t_function: + +Function Type +^^^^^^^^^^^^^ + +Overview: +""""""""" + +The function type can be thought of as a function signature. It consists +of a return type and a list of formal parameter types. The return type +of a function type is a first class type or a void type. + +Syntax: +""""""" + +:: + + () + +...where '````' is a comma-separated list of type +specifiers. Optionally, the parameter list may include a type ``...``, +which indicates that the function takes a variable number of arguments. +Variable argument functions can access their arguments with the +:ref:`variable argument handling intrinsic ` functions. +'````' is any type except :ref:`label `. + +Examples: +""""""""" + ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``float (i16, i32 *) *`` | :ref:`Pointer ` to a function that takes an ``i16`` and a :ref:`pointer ` to ``i32``, returning ``float``. | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer ` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure ` containing two ``i32`` values | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +.. _t_struct: + +Structure Type +^^^^^^^^^^^^^^ + +Overview: +""""""""" + +The structure type is used to represent a collection of data members +together in memory. The elements of a structure may be any type that has +a size. + +Structures in memory are accessed using '``load``' and '``store``' by +getting a pointer to a field with the '``getelementptr``' instruction. +Structures in registers are accessed using the '``extractvalue``' and +'``insertvalue``' instructions. + +Structures may optionally be "packed" structures, which indicate that +the alignment of the struct is one byte, and that there is no padding +between the elements. In non-packed structs, padding between field types +is inserted as defined by the DataLayout string in the module, which is +required to match what the underlying code generator expects. + +Structures can either be "literal" or "identified". A literal structure +is defined inline with other types (e.g. ``{i32, i32}*``) whereas +identified types are always defined at the top level with a name. +Literal types are uniqued by their contents and can never be recursive +or opaque since there is no way to write one. Identified types can be +recursive, can be opaqued, and are never uniqued. + +Syntax: +""""""" + +:: + + %T1 = type { } ; Identified normal struct type + %T2 = type <{ }> ; Identified packed struct type + +Examples: +""""""""" + ++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values | ++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``{ float, i32 (i32) * }`` | A pair, where the first element is a ``float`` and the second element is a :ref:`pointer ` to a :ref:`function ` that takes an ``i32``, returning an ``i32``. | ++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``<{ i8, i32 }>`` | A packed struct known to be 5 bytes in size. | ++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +.. _t_opaque: + +Opaque Structure Types +^^^^^^^^^^^^^^^^^^^^^^ + +Overview: +""""""""" + +Opaque structure types are used to represent named structure types that +do not have a body specified. This corresponds (for example) to the C +notion of a forward declared structure. + +Syntax: +""""""" + +:: + + %X = type opaque + %52 = type opaque + +Examples: +""""""""" + ++--------------+-------------------+ +| ``opaque`` | An opaque type. | ++--------------+-------------------+ + +.. _t_pointer: + +Pointer Type +^^^^^^^^^^^^ + +Overview: +""""""""" + +The pointer type is used to specify memory locations. Pointers are +commonly used to reference objects in memory. + +Pointer types may have an optional address space attribute defining the +numbered address space where the pointed-to object resides. The default +address space is number zero. The semantics of non-zero address spaces +are target-specific. + +Note that LLVM does not permit pointers to void (``void*``) nor does it +permit pointers to labels (``label*``). Use ``i8*`` instead. + +Syntax: +""""""" + +:: + + * + +Examples: +""""""""" + ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``[4 x i32]*`` | A :ref:`pointer ` to :ref:`array ` of four ``i32`` values. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``i32 (i32*) *`` | A :ref:`pointer ` to a :ref:`function ` that takes an ``i32*``, returning an ``i32``. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``i32 addrspace(5)*`` | A :ref:`pointer ` to an ``i32`` value that resides in address space #5. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ + +.. _t_vector: + +Vector Type +^^^^^^^^^^^ + +Overview: +""""""""" + +A vector type is a simple derived type that represents a vector of +elements. Vector types are used when multiple primitive data are +operated in parallel using a single instruction (SIMD). A vector type +requires a size (number of elements) and an underlying primitive data +type. Vector types are considered :ref:`first class `. + +Syntax: +""""""" + +:: + + < <# elements> x > + +The number of elements is a constant integer value larger than 0; +elementtype may be any integer or floating point type, or a pointer to +these types. Vectors of size zero are not allowed. + +Examples: +""""""""" + ++-------------------+--------------------------------------------------+ +| ``<4 x i32>`` | Vector of 4 32-bit integer values. | ++-------------------+--------------------------------------------------+ +| ``<8 x float>`` | Vector of 8 32-bit floating-point values. | ++-------------------+--------------------------------------------------+ +| ``<2 x i64>`` | Vector of 2 64-bit integer values. | ++-------------------+--------------------------------------------------+ +| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. | ++-------------------+--------------------------------------------------+ + +Constants +========= + +LLVM has several different basic types of constants. This section +describes them all and their syntax. + +Simple Constants +---------------- + +**Boolean constants** + The two strings '``true``' and '``false``' are both valid constants + of the ``i1`` type. +**Integer constants** + Standard integers (such as '4') are constants of the + :ref:`integer ` type. Negative numbers may be used with + integer types. +**Floating point constants** + Floating point constants use standard decimal notation (e.g. + 123.421), exponential notation (e.g. 1.23421e+2), or a more precise + hexadecimal notation (see below). The assembler requires the exact + decimal value of a floating-point constant. For example, the + assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating + decimal in binary. Floating point constants must have a :ref:`floating + point ` type. +**Null pointer constants** + The identifier '``null``' is recognized as a null pointer constant + and must be of :ref:`pointer type `. + +The one non-intuitive notation for constants is the hexadecimal form of +floating point constants. For example, the form +'``double 0x432ff973cafa8000``' is equivalent to (but harder to read +than) '``double 4.5e+15``'. The only time hexadecimal floating point +constants are required (and the only time that they are generated by the +disassembler) is when a floating point constant must be emitted but it +cannot be represented as a decimal floating point number in a reasonable +number of digits. For example, NaN's, infinities, and other special +values are represented in their IEEE hexadecimal format so that assembly +and disassembly do not cause any bits to change in the constants. + +When using the hexadecimal form, constants of types half, float, and +double are represented using the 16-digit form shown above (which +matches the IEEE754 representation for double); half and float values +must, however, be exactly representable as IEEE 754 half and single +precision, respectively. Hexadecimal format is always used for long +double, and there are three forms of long double. The 80-bit format used +by x86 is represented as ``0xK`` followed by 20 hexadecimal digits. The +128-bit format used by PowerPC (two adjacent doubles) is represented by +``0xM`` followed by 32 hexadecimal digits. The IEEE 128-bit format is +represented by ``0xL`` followed by 32 hexadecimal digits; no currently +supported target uses this format. Long doubles will only work if they +match the long double format on your target. The IEEE 16-bit format +(half precision) is represented by ``0xH`` followed by 4 hexadecimal +digits. All hexadecimal formats are big-endian (sign bit at the left). + +There are no constants of type x86mmx. + +Complex Constants +----------------- + +Complex constants are a (potentially recursive) combination of simple +constants and smaller complex constants. + +**Structure constants** + Structure constants are represented with notation similar to + structure type definitions (a comma separated list of elements, + surrounded by braces (``{}``)). For example: + "``{ i32 4, float 17.0, i32* @G }``", where "``@G``" is declared as + "``@G = external global i32``". Structure constants must have + :ref:`structure type `, and the number and types of elements + must match those specified by the type. +**Array constants** + Array constants are represented with notation similar to array type + definitions (a comma separated list of elements, surrounded by + square brackets (``[]``)). For example: + "``[ i32 42, i32 11, i32 74 ]``". Array constants must have + :ref:`array type `, and the number and types of elements must + match those specified by the type. +**Vector constants** + Vector constants are represented with notation similar to vector + type definitions (a comma separated list of elements, surrounded by + less-than/greater-than's (``<>``)). For example: + "``< i32 42, i32 11, i32 74, i32 100 >``". Vector constants + must have :ref:`vector type `, and the number and types of + elements must match those specified by the type. +**Zero initialization** + The string '``zeroinitializer``' can be used to zero initialize a + value to zero of *any* type, including scalar and + :ref:`aggregate ` types. This is often used to avoid + having to print large zero initializers (e.g. for large arrays) and + is always exactly equivalent to using explicit zero initializers. +**Metadata node** + A metadata node is a structure-like constant with :ref:`metadata + type `. For example: + "``metadata !{ i32 0, metadata !"test" }``". Unlike other + constants that are meant to be interpreted as part of the + instruction stream, metadata is a place to attach additional + information such as debug info. + +Global Variable and Function Addresses +-------------------------------------- + +The addresses of :ref:`global variables ` and +:ref:`functions ` are always implicitly valid +(link-time) constants. These constants are explicitly referenced when +the :ref:`identifier for the global ` is used and always have +:ref:`pointer ` type. For example, the following is a legal LLVM +file: + +.. code-block:: llvm + + @X = global i32 17 + @Y = global i32 42 + @Z = global [2 x i32*] [ i32* @X, i32* @Y ] + +.. _undefvalues: + +Undefined Values +---------------- + +The string '``undef``' can be used anywhere a constant is expected, and +indicates that the user of the value may receive an unspecified +bit-pattern. Undefined values may be of any type (other than '``label``' +or '``void``') and be used anywhere a constant is permitted. + +Undefined values are useful because they indicate to the compiler that +the program is well defined no matter what value is used. This gives the +compiler more freedom to optimize. Here are some examples of +(potentially surprising) transformations that are valid (in pseudo IR): + +.. code-block:: llvm + + %A = add %X, undef + %B = sub %X, undef + %C = xor %X, undef + Safe: + %A = undef + %B = undef + %C = undef + +This is safe because all of the output bits are affected by the undef +bits. Any output bit can have a zero or one depending on the input bits. + +.. code-block:: llvm + + %A = or %X, undef + %B = and %X, undef + Safe: + %A = -1 + %B = 0 + Unsafe: + %A = undef + %B = undef + +These logical operations have bits that are not always affected by the +input. For example, if ``%X`` has a zero bit, then the output of the +'``and``' operation will always be a zero for that bit, no matter what +the corresponding bit from the '``undef``' is. As such, it is unsafe to +optimize or assume that the result of the '``and``' is '``undef``'. +However, it is safe to assume that all bits of the '``undef``' could be +0, and optimize the '``and``' to 0. Likewise, it is safe to assume that +all the bits of the '``undef``' operand to the '``or``' could be set, +allowing the '``or``' to be folded to -1. + +.. code-block:: llvm + + %A = select undef, %X, %Y + %B = select undef, 42, %Y + %C = select %X, %Y, undef + Safe: + %A = %X (or %Y) + %B = 42 (or %Y) + %C = %Y + Unsafe: + %A = undef + %B = undef + %C = undef + +This set of examples shows that undefined '``select``' (and conditional +branch) conditions can go *either way*, but they have to come from one +of the two operands. In the ``%A`` example, if ``%X`` and ``%Y`` were +both known to have a clear low bit, then ``%A`` would have to have a +cleared low bit. However, in the ``%C`` example, the optimizer is +allowed to assume that the '``undef``' operand could be the same as +``%Y``, allowing the whole '``select``' to be eliminated. + +.. code-block:: llvm + + %A = xor undef, undef + + %B = undef + %C = xor %B, %B + + %D = undef + %E = icmp lt %D, 4 + %F = icmp gte %D, 4 + + Safe: + %A = undef + %B = undef + %C = undef + %D = undef + %E = undef + %F = undef + +This example points out that two '``undef``' operands are not +necessarily the same. This can be surprising to people (and also matches +C semantics) where they assume that "``X^X``" is always zero, even if +``X`` is undefined. This isn't true for a number of reasons, but the +short answer is that an '``undef``' "variable" can arbitrarily change +its value over its "live range". This is true because the variable +doesn't actually *have a live range*. Instead, the value is logically +read from arbitrary registers that happen to be around when needed, so +the value is not necessarily consistent over time. In fact, ``%A`` and +``%C`` need to have the same semantics or the core LLVM "replace all +uses with" concept would not hold. + +.. code-block:: llvm + + %A = fdiv undef, %X + %B = fdiv %X, undef + Safe: + %A = undef + b: unreachable + +These examples show the crucial difference between an *undefined value* +and *undefined behavior*. An undefined value (like '``undef``') is +allowed to have an arbitrary bit-pattern. This means that the ``%A`` +operation can be constant folded to '``undef``', because the '``undef``' +could be an SNaN, and ``fdiv`` is not (currently) defined on SNaN's. +However, in the second example, we can make a more aggressive +assumption: because the ``undef`` is allowed to be an arbitrary value, +we are allowed to assume that it could be zero. Since a divide by zero +has *undefined behavior*, we are allowed to assume that the operation +does not execute at all. This allows us to delete the divide and all +code after it. Because the undefined operation "can't happen", the +optimizer can assume that it occurs in dead code. + +.. code-block:: llvm + + a: store undef -> %X + b: store %X -> undef + Safe: + a: + b: unreachable + +These examples reiterate the ``fdiv`` example: a store *of* an undefined +value can be assumed to not have any effect; we can assume that the +value is overwritten with bits that happen to match what was already +there. However, a store *to* an undefined location could clobber +arbitrary memory, therefore, it has undefined behavior. + +.. _poisonvalues: + +Poison Values +------------- + +Poison values are similar to :ref:`undef values `, however +they also represent the fact that an instruction or constant expression +which cannot evoke side effects has nevertheless detected a condition +which results in undefined behavior. + +There is currently no way of representing a poison value in the IR; they +only exist when produced by operations such as :ref:`add ` with +the ``nsw`` flag. + +Poison value behavior is defined in terms of value *dependence*: + +- Values other than :ref:`phi ` nodes depend on their operands. +- :ref:`Phi ` nodes depend on the operand corresponding to + their dynamic predecessor basic block. +- Function arguments depend on the corresponding actual argument values + in the dynamic callers of their functions. +- :ref:`Call ` instructions depend on the :ref:`ret ` + instructions that dynamically transfer control back to them. +- :ref:`Invoke ` instructions depend on the + :ref:`ret `, :ref:`resume `, or exception-throwing + call instructions that dynamically transfer control back to them. +- Non-volatile loads and stores depend on the most recent stores to all + of the referenced memory addresses, following the order in the IR + (including loads and stores implied by intrinsics such as + :ref:`@llvm.memcpy `.) +- An instruction with externally visible side effects depends on the + most recent preceding instruction with externally visible side + effects, following the order in the IR. (This includes :ref:`volatile + operations `.) +- An instruction *control-depends* on a :ref:`terminator + instruction ` if the terminator instruction has + multiple successors and the instruction is always executed when + control transfers to one of the successors, and may not be executed + when control is transferred to another. +- Additionally, an instruction also *control-depends* on a terminator + instruction if the set of instructions it otherwise depends on would + be different if the terminator had transferred control to a different + successor. +- Dependence is transitive. + +Poison Values have the same behavior as :ref:`undef values `, +with the additional affect that any instruction which has a *dependence* +on a poison value has undefined behavior. + +Here are some examples: + +.. code-block:: llvm + + entry: + %poison = sub nuw i32 0, 1 ; Results in a poison value. + %still_poison = and i32 %poison, 0 ; 0, but also poison. + %poison_yet_again = getelementptr i32* @h, i32 %still_poison + store i32 0, i32* %poison_yet_again ; memory at @h[0] is poisoned + + store i32 %poison, i32* @g ; Poison value stored to memory. + %poison2 = load i32* @g ; Poison value loaded back from memory. + + store volatile i32 %poison, i32* @g ; External observation; undefined behavior. + + %narrowaddr = bitcast i32* @g to i16* + %wideaddr = bitcast i32* @g to i64* + %poison3 = load i16* %narrowaddr ; Returns a poison value. + %poison4 = load i64* %wideaddr ; Returns a poison value. + + %cmp = icmp slt i32 %poison, 0 ; Returns a poison value. + br i1 %cmp, label %true, label %end ; Branch to either destination. + + true: + store volatile i32 0, i32* @g ; This is control-dependent on %cmp, so + ; it has undefined behavior. + br label %end + + end: + %p = phi i32 [ 0, %entry ], [ 1, %true ] + ; Both edges into this PHI are + ; control-dependent on %cmp, so this + ; always results in a poison value. + + store volatile i32 0, i32* @g ; This would depend on the store in %true + ; if %cmp is true, or the store in %entry + ; otherwise, so this is undefined behavior. + + br i1 %cmp, label %second_true, label %second_end + ; The same branch again, but this time the + ; true block doesn't have side effects. + + second_true: + ; No side effects! + ret void + + second_end: + store volatile i32 0, i32* @g ; This time, the instruction always depends + ; on the store in %end. Also, it is + ; control-equivalent to %end, so this is + ; well-defined (ignoring earlier undefined + ; behavior in this example). + +.. _blockaddress: + +Addresses of Basic Blocks +------------------------- + +``blockaddress(@function, %block)`` + +The '``blockaddress``' constant computes the address of the specified +basic block in the specified function, and always has an ``i8*`` type. +Taking the address of the entry block is illegal. + +This value only has defined behavior when used as an operand to the +':ref:`indirectbr `' instruction, or for comparisons +against null. Pointer equality tests between labels addresses results in +undefined behavior --- though, again, comparison against null is ok, and +no label is equal to the null pointer. This may be passed around as an +opaque pointer sized value as long as the bits are not inspected. This +allows ``ptrtoint`` and arithmetic to be performed on these values so +long as the original value is reconstituted before the ``indirectbr`` +instruction. + +Finally, some targets may provide defined semantics when using the value +as the operand to an inline assembly, but that is target specific. + +Constant Expressions +-------------------- + +Constant expressions are used to allow expressions involving other +constants to be used as constants. Constant expressions may be of any +:ref:`first class ` type and may involve any LLVM operation +that does not have side effects (e.g. load and call are not supported). +The following is the syntax for constant expressions: + +``trunc (CST to TYPE)`` + Truncate a constant to another type. The bit size of CST must be + larger than the bit size of TYPE. Both types must be integers. +``zext (CST to TYPE)`` + Zero extend a constant to another type. The bit size of CST must be + smaller than the bit size of TYPE. Both types must be integers. +``sext (CST to TYPE)`` + Sign extend a constant to another type. The bit size of CST must be + smaller than the bit size of TYPE. Both types must be integers. +``fptrunc (CST to TYPE)`` + Truncate a floating point constant to another floating point type. + The size of CST must be larger than the size of TYPE. Both types + must be floating point. +``fpext (CST to TYPE)`` + Floating point extend a constant to another type. The size of CST + must be smaller or equal to the size of TYPE. Both types must be + floating point. +``fptoui (CST to TYPE)`` + Convert a floating point constant to the corresponding unsigned + integer constant. TYPE must be a scalar or vector integer type. CST + must be of scalar or vector floating point type. Both CST and TYPE + must be scalars, or vectors of the same number of elements. If the + value won't fit in the integer type, the results are undefined. +``fptosi (CST to TYPE)`` + Convert a floating point constant to the corresponding signed + integer constant. TYPE must be a scalar or vector integer type. CST + must be of scalar or vector floating point type. Both CST and TYPE + must be scalars, or vectors of the same number of elements. If the + value won't fit in the integer type, the results are undefined. +``uitofp (CST to TYPE)`` + Convert an unsigned integer constant to the corresponding floating + point constant. TYPE must be a scalar or vector floating point type. + CST must be of scalar or vector integer type. Both CST and TYPE must + be scalars, or vectors of the same number of elements. If the value + won't fit in the floating point type, the results are undefined. +``sitofp (CST to TYPE)`` + Convert a signed integer constant to the corresponding floating + point constant. TYPE must be a scalar or vector floating point type. + CST must be of scalar or vector integer type. Both CST and TYPE must + be scalars, or vectors of the same number of elements. If the value + won't fit in the floating point type, the results are undefined. +``ptrtoint (CST to TYPE)`` + Convert a pointer typed constant to the corresponding integer + constant. ``TYPE`` must be an integer type. ``CST`` must be of + pointer type. The ``CST`` value is zero extended, truncated, or + unchanged to make it fit in ``TYPE``. +``inttoptr (CST to TYPE)`` + Convert an integer constant to a pointer constant. TYPE must be a + pointer type. CST must be of integer type. The CST value is zero + extended, truncated, or unchanged to make it fit in a pointer size. + This one is *really* dangerous! +``bitcast (CST to TYPE)`` + Convert a constant, CST, to another TYPE. The constraints of the + operands are the same as those for the :ref:`bitcast + instruction `. +``getelementptr (CSTPTR, IDX0, IDX1, ...)``, ``getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)`` + Perform the :ref:`getelementptr operation ` on + constants. As with the :ref:`getelementptr ` + instruction, the index list may have zero or more indexes, which are + required to make sense for the type of "CSTPTR". +``select (COND, VAL1, VAL2)`` + Perform the :ref:`select operation ` on constants. +``icmp COND (VAL1, VAL2)`` + Performs the :ref:`icmp operation ` on constants. +``fcmp COND (VAL1, VAL2)`` + Performs the :ref:`fcmp operation ` on constants. +``extractelement (VAL, IDX)`` + Perform the :ref:`extractelement operation ` on + constants. +``insertelement (VAL, ELT, IDX)`` + Perform the :ref:`insertelement operation ` on + constants. +``shufflevector (VEC1, VEC2, IDXMASK)`` + Perform the :ref:`shufflevector operation ` on + constants. +``extractvalue (VAL, IDX0, IDX1, ...)`` + Perform the :ref:`extractvalue operation ` on + constants. The index list is interpreted in a similar manner as + indices in a ':ref:`getelementptr `' operation. At + least one index value must be specified. +``insertvalue (VAL, ELT, IDX0, IDX1, ...)`` + Perform the :ref:`insertvalue operation ` on constants. + The index list is interpreted in a similar manner as indices in a + ':ref:`getelementptr `' operation. At least one index + value must be specified. +``OPCODE (LHS, RHS)`` + Perform the specified operation of the LHS and RHS constants. OPCODE + may be any of the :ref:`binary ` or :ref:`bitwise + binary ` operations. The constraints on operands are + the same as those for the corresponding instruction (e.g. no bitwise + operations on floating point values are allowed). + +Other Values +============ + +Inline Assembler Expressions +---------------------------- + +LLVM supports inline assembler expressions (as opposed to :ref:`Module-Level +Inline Assembly `) through the use of a special value. This +value represents the inline assembler as a string (containing the +instructions to emit), a list of operand constraints (stored as a +string), a flag that indicates whether or not the inline asm expression +has side effects, and a flag indicating whether the function containing +the asm needs to align its stack conservatively. An example inline +assembler expression is: + +.. code-block:: llvm + + i32 (i32) asm "bswap $0", "=r,r" + +Inline assembler expressions may **only** be used as the callee operand +of a :ref:`call ` or an :ref:`invoke ` instruction. +Thus, typically we have: + +.. code-block:: llvm + + %X = call i32 asm "bswap $0", "=r,r"(i32 %Y) + +Inline asms with side effects not visible in the constraint list must be +marked as having side effects. This is done through the use of the +'``sideeffect``' keyword, like so: + +.. code-block:: llvm + + call void asm sideeffect "eieio", ""() + +In some cases inline asms will contain code that will not work unless +the stack is aligned in some way, such as calls or SSE instructions on +x86, yet will not contain code that does that alignment within the asm. +The compiler should make conservative assumptions about what the asm +might contain and should generate its usual stack alignment code in the +prologue if the '``alignstack``' keyword is present: + +.. code-block:: llvm + + call void asm alignstack "eieio", ""() + +Inline asms also support using non-standard assembly dialects. The +assumed dialect is ATT. When the '``inteldialect``' keyword is present, +the inline asm is using the Intel dialect. Currently, ATT and Intel are +the only supported dialects. An example is: + +.. code-block:: llvm + + call void asm inteldialect "eieio", ""() + +If multiple keywords appear the '``sideeffect``' keyword must come +first, the '``alignstack``' keyword second and the '``inteldialect``' +keyword last. + +Inline Asm Metadata +^^^^^^^^^^^^^^^^^^^ + +The call instructions that wrap inline asm nodes may have a +"``!srcloc``" MDNode attached to it that contains a list of constant +integers. If present, the code generator will use the integer as the +location cookie value when report errors through the ``LLVMContext`` +error reporting mechanisms. This allows a front-end to correlate backend +errors that occur with inline asm back to the source code that produced +it. For example: + +.. code-block:: llvm + + call void asm sideeffect "something bad", ""(), !srcloc !42 + ... + !42 = !{ i32 1234567 } + +It is up to the front-end to make sense of the magic numbers it places +in the IR. If the MDNode contains multiple constants, the code generator +will use the one that corresponds to the line of the asm that the error +occurs on. + +.. _metadata: + +Metadata Nodes and Metadata Strings +----------------------------------- + +LLVM IR allows metadata to be attached to instructions in the program +that can convey extra information about the code to the optimizers and +code generator. One example application of metadata is source-level +debug information. There are two metadata primitives: strings and nodes. +All metadata has the ``metadata`` type and is identified in syntax by a +preceding exclamation point ('``!``'). + +A metadata string is a string surrounded by double quotes. It can +contain any character by escaping non-printable characters with +"``\xx``" where "``xx``" is the two digit hex code. For example: +"``!"test\00"``". + +Metadata nodes are represented with notation similar to structure +constants (a comma separated list of elements, surrounded by braces and +preceded by an exclamation point). Metadata nodes can have any values as +their operand. For example: + +.. code-block:: llvm + + !{ metadata !"test\00", i32 10} + +A :ref:`named metadata ` is a collection of +metadata nodes, which can be looked up in the module symbol table. For +example: + +.. code-block:: llvm + + !foo = metadata !{!4, !3} + +Metadata can be used as function arguments. Here ``llvm.dbg.value`` +function is using two metadata arguments: + +.. code-block:: llvm + + call void @llvm.dbg.value(metadata !24, i64 0, metadata !25) + +Metadata can be attached with an instruction. Here metadata ``!21`` is +attached to the ``add`` instruction using the ``!dbg`` identifier: + +.. code-block:: llvm + + %indvar.next = add i64 %indvar, 1, !dbg !21 + +More information about specific metadata nodes recognized by the +optimizers and code generator is found below. + +'``tbaa``' Metadata +^^^^^^^^^^^^^^^^^^^ + +In LLVM IR, memory does not have types, so LLVM's own type system is not +suitable for doing TBAA. Instead, metadata is added to the IR to +describe a type system of a higher level language. This can be used to +implement typical C/C++ TBAA, but it can also be used to implement +custom alias analysis behavior for other languages. + +The current metadata format is very simple. TBAA metadata nodes have up +to three fields, e.g.: + +.. code-block:: llvm + + !0 = metadata !{ metadata !"an example type tree" } + !1 = metadata !{ metadata !"int", metadata !0 } + !2 = metadata !{ metadata !"float", metadata !0 } + !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } + +The first field is an identity field. It can be any value, usually a +metadata string, which uniquely identifies the type. The most important +name in the tree is the name of the root node. Two trees with different +root node names are entirely disjoint, even if they have leaves with +common names. + +The second field identifies the type's parent node in the tree, or is +null or omitted for a root node. A type is considered to alias all of +its descendants and all of its ancestors in the tree. Also, a type is +considered to alias all types in other trees, so that bitcode produced +from multiple front-ends is handled conservatively. + +If the third field is present, it's an integer which if equal to 1 +indicates that the type is "constant" (meaning +``pointsToConstantMemory`` should return true; see `other useful +AliasAnalysis methods `_). + +'``tbaa.struct``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :ref:`llvm.memcpy ` is often used to implement +aggregate assignment operations in C and similar languages, however it +is defined to copy a contiguous region of memory, which is more than +strictly necessary for aggregate types which contain holes due to +padding. Also, it doesn't contain any TBAA information about the fields +of the aggregate. + +``!tbaa.struct`` metadata can describe which memory subregions in a +memcpy are padding and what the TBAA tags of the struct are. + +The current metadata format is very simple. ``!tbaa.struct`` metadata +nodes are a list of operands which are in conceptual groups of three. +For each group of three, the first operand gives the byte offset of a +field in bytes, the second gives its size in bytes, and the third gives +its tbaa tag. e.g.: + +.. code-block:: llvm + + !4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 } + +This describes a struct with two fields. The first is at offset 0 bytes +with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes +and has size 4 bytes and has tbaa tag !2. + +Note that the fields need not be contiguous. In this example, there is a +4 byte gap between the two fields. This gap represents padding which +does not carry useful data and need not be preserved. + +'``fpmath``' Metadata +^^^^^^^^^^^^^^^^^^^^^ + +``fpmath`` metadata may be attached to any instruction of floating point +type. It can be used to express the maximum acceptable error in the +result of that instruction, in ULPs, thus potentially allowing the +compiler to use a more efficient but less accurate method of computing +it. ULP is defined as follows: + + If ``x`` is a real number that lies between two finite consecutive + floating-point numbers ``a`` and ``b``, without being equal to one + of them, then ``ulp(x) = |b - a|``, otherwise ``ulp(x)`` is the + distance between the two non-equal finite floating-point numbers + nearest ``x``. Moreover, ``ulp(NaN)`` is ``NaN``. + +The metadata node shall consist of a single positive floating point +number representing the maximum relative error, for example: + +.. code-block:: llvm + + !0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs + +'``range``' Metadata +^^^^^^^^^^^^^^^^^^^^ + +``range`` metadata may be attached only to loads of integer types. It +expresses the possible ranges the loaded value is in. The ranges are +represented with a flattened list of integers. The loaded value is known +to be in the union of the ranges defined by each consecutive pair. Each +pair has the following properties: + +- The type must match the type loaded by the instruction. +- The pair ``a,b`` represents the range ``[a,b)``. +- Both ``a`` and ``b`` are constants. +- The range is allowed to wrap. +- The range should not represent the full or empty set. That is, + ``a!=b``. + +In addition, the pairs must be in signed order of the lower bound and +they must be non-contiguous. + +Examples: + +.. code-block:: llvm + + %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1 + %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1 + %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5 + %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5 + ... + !0 = metadata !{ i8 0, i8 2 } + !1 = metadata !{ i8 255, i8 2 } + !2 = metadata !{ i8 0, i8 2, i8 3, i8 6 } + !3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 } + +'``llvm.loop``' +^^^^^^^^^^^^^^^ + +It is sometimes useful to attach information to loop constructs. Currently, +loop metadata is implemented as metadata attached to the branch instruction +in the loop latch block. This type of metadata refer to a metadata node that is +guaranteed to be separate for each loop. The loop-level metadata is prefixed +with ``llvm.loop``. + +The loop identifier metadata is implemented using a metadata that refers to +itself to avoid merging it with any other identifier metadata, e.g., +during module linkage or function inlining. That is, each loop should refer +to their own identification metadata even if they reside in separate functions. +The following example contains loop identifier metadata for two separate loop +constructs: + +.. code-block:: llvm + + !0 = metadata !{ metadata !0 } + !1 = metadata !{ metadata !1 } + + +'``llvm.loop.parallel``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This loop metadata can be used to communicate that a loop should be considered +a parallel loop. The semantics of parallel loops in this case is the one +with the strongest cross-iteration instruction ordering freedom: the +iterations in the loop can be considered completely independent of each +other (also known as embarrassingly parallel loops). + +This metadata can originate from a programming language with parallel loop +constructs. In such a case it is completely the programmer's responsibility +to ensure the instructions from the different iterations of the loop can be +executed in an arbitrary order, in parallel, or intertwined. No loop-carried +dependency checking at all must be expected from the compiler. + +In order to fulfill the LLVM requirement for metadata to be safely ignored, +it is important to ensure that a parallel loop is converted to +a sequential loop in case an optimization (agnostic of the parallel loop +semantics) converts the loop back to such. This happens when new memory +accesses that do not fulfill the requirement of free ordering across iterations +are added to the loop. Therefore, this metadata is required, but not +sufficient, to consider the loop at hand a parallel loop. For a loop +to be parallel, all its memory accessing instructions need to be +marked with the ``llvm.mem.parallel_loop_access`` metadata that refer +to the same loop identifier metadata that identify the loop at hand. + +'``llvm.mem``' +^^^^^^^^^^^^^^^ + +Metadata types used to annotate memory accesses with information helpful +for optimizations are prefixed with ``llvm.mem``. + +'``llvm.mem.parallel_loop_access``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For a loop to be parallel, in addition to using +the ``llvm.loop.parallel`` metadata to mark the loop latch branch instruction, +also all of the memory accessing instructions in the loop body need to be +marked with the ``llvm.mem.parallel_loop_access`` metadata. If there +is at least one memory accessing instruction not marked with the metadata, +the loop, despite it possibly using the ``llvm.loop.parallel`` metadata, +must be considered a sequential loop. This causes parallel loops to be +converted to sequential loops due to optimization passes that are unaware of +the parallel semantics and that insert new memory instructions to the loop +body. + +Example of a loop that is considered parallel due to its correct use of +both ``llvm.loop.parallel`` and ``llvm.mem.parallel_loop_access`` +metadata types that refer to the same loop identifier metadata. + +.. code-block:: llvm + + for.body: + ... + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0 + ... + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0 + ... + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !0 + + for.end: + ... + !0 = metadata !{ metadata !0 } + +It is also possible to have nested parallel loops. In that case the +memory accesses refer to a list of loop identifier metadata nodes instead of +the loop identifier metadata node directly: + +.. code-block:: llvm + + outer.for.body: + ... + + inner.for.body: + ... + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0 + ... + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0 + ... + br i1 %exitcond, label %inner.for.end, label %inner.for.body, !llvm.loop.parallel !1 + + inner.for.end: + ... + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0 + ... + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0 + ... + br i1 %exitcond, label %outer.for.end, label %outer.for.body, !llvm.loop.parallel !2 + + outer.for.end: ; preds = %for.body + ... + !0 = metadata !{ metadata !1, metadata !2 } ; a list of parallel loop identifiers + !1 = metadata !{ metadata !1 } ; an identifier for the inner parallel loop + !2 = metadata !{ metadata !2 } ; an identifier for the outer parallel loop + + +Module Flags Metadata +===================== + +Information about the module as a whole is difficult to convey to LLVM's +subsystems. The LLVM IR isn't sufficient to transmit this information. +The ``llvm.module.flags`` named metadata exists in order to facilitate +this. These flags are in the form of key / value pairs --- much like a +dictionary --- making it easy for any subsystem who cares about a flag to +look it up. + +The ``llvm.module.flags`` metadata contains a list of metadata triplets. +Each triplet has the following form: + +- The first element is a *behavior* flag, which specifies the behavior + when two (or more) modules are merged together, and it encounters two + (or more) metadata with the same ID. The supported behaviors are + described below. +- The second element is a metadata string that is a unique ID for the + metadata. Each module may only have one flag entry for each unique ID (not + including entries with the **Require** behavior). +- The third element is the value of the flag. + +When two (or more) modules are merged together, the resulting +``llvm.module.flags`` metadata is the union of the modules' flags. That is, for +each unique metadata ID string, there will be exactly one entry in the merged +modules ``llvm.module.flags`` metadata table, and the value for that entry will +be determined by the merge behavior flag, as described below. The only exception +is that entries with the *Require* behavior are always preserved. + +The following behaviors are supported: + +.. list-table:: + :header-rows: 1 + :widths: 10 90 + + * - Value + - Behavior + + * - 1 + - **Error** + Emits an error if two values disagree, otherwise the resulting value + is that of the operands. + + * - 2 + - **Warning** + Emits a warning if two values disagree. The result value will be the + operand for the flag from the first module being linked. + + * - 3 + - **Require** + Adds a requirement that another module flag be present and have a + specified value after linking is performed. The value must be a + metadata pair, where the first element of the pair is the ID of the + module flag to be restricted, and the second element of the pair is + the value the module flag should be restricted to. This behavior can + be used to restrict the allowable results (via triggering of an + error) of linking IDs with the **Override** behavior. + + * - 4 + - **Override** + Uses the specified value, regardless of the behavior or value of the + other module. If both modules specify **Override**, but the values + differ, an error will be emitted. + + * - 5 + - **Append** + Appends the two values, which are required to be metadata nodes. + + * - 6 + - **AppendUnique** + Appends the two values, which are required to be metadata + nodes. However, duplicate entries in the second list are dropped + during the append operation. + +It is an error for a particular unique flag ID to have multiple behaviors, +except in the case of **Require** (which adds restrictions on another metadata +value) or **Override**. + +An example of module flags: + +.. code-block:: llvm + + !0 = metadata !{ i32 1, metadata !"foo", i32 1 } + !1 = metadata !{ i32 4, metadata !"bar", i32 37 } + !2 = metadata !{ i32 2, metadata !"qux", i32 42 } + !3 = metadata !{ i32 3, metadata !"qux", + metadata !{ + metadata !"foo", i32 1 + } + } + !llvm.module.flags = !{ !0, !1, !2, !3 } + +- Metadata ``!0`` has the ID ``!"foo"`` and the value '1'. The behavior + if two or more ``!"foo"`` flags are seen is to emit an error if their + values are not equal. + +- Metadata ``!1`` has the ID ``!"bar"`` and the value '37'. The + behavior if two or more ``!"bar"`` flags are seen is to use the value + '37'. + +- Metadata ``!2`` has the ID ``!"qux"`` and the value '42'. The + behavior if two or more ``!"qux"`` flags are seen is to emit a + warning if their values are not equal. + +- Metadata ``!3`` has the ID ``!"qux"`` and the value: + + :: + + metadata !{ metadata !"foo", i32 1 } + + The behavior is to emit an error if the ``llvm.module.flags`` does not + contain a flag with the ID ``!"foo"`` that has the value '1' after linking is + performed. + +Objective-C Garbage Collection Module Flags Metadata +---------------------------------------------------- + +On the Mach-O platform, Objective-C stores metadata about garbage +collection in a special section called "image info". The metadata +consists of a version number and a bitmask specifying what types of +garbage collection are supported (if any) by the file. If two or more +modules are linked together their garbage collection metadata needs to +be merged rather than appended together. + +The Objective-C garbage collection module flags metadata consists of the +following key-value pairs: + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Key + - Value + + * - ``Objective-C Version`` + - **[Required]** --- The Objective-C ABI version. Valid values are 1 and 2. + + * - ``Objective-C Image Info Version`` + - **[Required]** --- The version of the image info section. Currently + always 0. + + * - ``Objective-C Image Info Section`` + - **[Required]** --- The section to place the metadata. Valid values are + ``"__OBJC, __image_info, regular"`` for Objective-C ABI version 1, and + ``"__DATA,__objc_imageinfo, regular, no_dead_strip"`` for + Objective-C ABI version 2. + + * - ``Objective-C Garbage Collection`` + - **[Required]** --- Specifies whether garbage collection is supported or + not. Valid values are 0, for no garbage collection, and 2, for garbage + collection supported. + + * - ``Objective-C GC Only`` + - **[Optional]** --- Specifies that only garbage collection is supported. + If present, its value must be 6. This flag requires that the + ``Objective-C Garbage Collection`` flag have the value 2. + +Some important flag interactions: + +- If a module with ``Objective-C Garbage Collection`` set to 0 is + merged with a module with ``Objective-C Garbage Collection`` set to + 2, then the resulting module has the + ``Objective-C Garbage Collection`` flag set to 0. +- A module with ``Objective-C Garbage Collection`` set to 0 cannot be + merged with a module with ``Objective-C GC Only`` set to 6. + +Automatic Linker Flags Module Flags Metadata +-------------------------------------------- + +Some targets support embedding flags to the linker inside individual object +files. Typically this is used in conjunction with language extensions which +allow source files to explicitly declare the libraries they depend on, and have +these automatically be transmitted to the linker via object files. + +These flags are encoded in the IR using metadata in the module flags section, +using the ``Linker Options`` key. The merge behavior for this flag is required +to be ``AppendUnique``, and the value for the key is expected to be a metadata +node which should be a list of other metadata nodes, each of which should be a +list of metadata strings defining linker options. + +For example, the following metadata section specifies two separate sets of +linker options, presumably to link against ``libz`` and the ``Cocoa`` +framework:: + + !0 = metadata !{ i32 6, metadata !"Linker Options", + metadata !{ + metadata !{ metadata !"-lz" }, + metadata !{ metadata !"-framework", metadata !"Cocoa" } } } + !llvm.module.flags = !{ !0 } + +The metadata encoding as lists of lists of options, as opposed to a collapsed +list of options, is chosen so that the IR encoding can use multiple option +strings to specify e.g., a single library, while still having that specifier be +preserved as an atomic element that can be recognized by a target specific +assembly writer or object file emitter. + +Each individual option is required to be either a valid option for the target's +linker, or an option that is reserved by the target specific assembly writer or +object file emitter. No other aspect of these options is defined by the IR. + +Intrinsic Global Variables +========================== + +LLVM has a number of "magic" global variables that contain data that +affect code generation or other IR semantics. These are documented here. +All globals of this sort should have a section specified as +"``llvm.metadata``". This section and all globals that start with +"``llvm.``" are reserved for use by LLVM. + +The '``llvm.used``' Global Variable +----------------------------------- + +The ``@llvm.used`` global is an array with i8\* element type which has +:ref:`appending linkage `. This array contains a list of +pointers to global variables and functions which may optionally have a +pointer cast formed of bitcast or getelementptr. For example, a legal +use of it is: + +.. code-block:: llvm + + @X = global i8 4 + @Y = global i32 123 + + @llvm.used = appending global [2 x i8*] [ + i8* @X, + i8* bitcast (i32* @Y to i8*) + ], section "llvm.metadata" + +If a global variable appears in the ``@llvm.used`` list, then the +compiler, assembler, and linker are required to treat the symbol as if +there is a reference to the global that it cannot see. For example, if a +variable has internal linkage and no references other than that from the +``@llvm.used`` list, it cannot be deleted. This is commonly used to +represent references from inline asms and other things the compiler +cannot "see", and corresponds to "``attribute((used))``" in GNU C. + +On some targets, the code generator must emit a directive to the +assembler or object file to prevent the assembler and linker from +molesting the symbol. + +The '``llvm.compiler.used``' Global Variable +-------------------------------------------- + +The ``@llvm.compiler.used`` directive is the same as the ``@llvm.used`` +directive, except that it only prevents the compiler from touching the +symbol. On targets that support it, this allows an intelligent linker to +optimize references to the symbol without being impeded as it would be +by ``@llvm.used``. + +This is a rare construct that should only be used in rare circumstances, +and should not be exposed to source languages. + +The '``llvm.global_ctors``' Global Variable +------------------------------------------- + +.. code-block:: llvm + + %0 = type { i32, void ()* } + @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }] + +The ``@llvm.global_ctors`` array contains a list of constructor +functions and associated priorities. The functions referenced by this +array will be called in ascending order of priority (i.e. lowest first) +when the module is loaded. The order of functions with the same priority +is not defined. + +The '``llvm.global_dtors``' Global Variable +------------------------------------------- + +.. code-block:: llvm + + %0 = type { i32, void ()* } + @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }] + +The ``@llvm.global_dtors`` array contains a list of destructor functions +and associated priorities. The functions referenced by this array will +be called in descending order of priority (i.e. highest first) when the +module is loaded. The order of functions with the same priority is not +defined. + +Instruction Reference +===================== + +The LLVM instruction set consists of several different classifications +of instructions: :ref:`terminator instructions `, :ref:`binary +instructions `, :ref:`bitwise binary +instructions `, :ref:`memory instructions `, and +:ref:`other instructions `. + +.. _terminators: + +Terminator Instructions +----------------------- + +As mentioned :ref:`previously `, every basic block in a +program ends with a "Terminator" instruction, which indicates which +block should be executed after the current block is finished. These +terminator instructions typically yield a '``void``' value: they produce +control flow, not values (the one exception being the +':ref:`invoke `' instruction). + +The terminator instructions are: ':ref:`ret `', +':ref:`br `', ':ref:`switch `', +':ref:`indirectbr `', ':ref:`invoke `', +':ref:`resume `', and ':ref:`unreachable `'. + +.. _i_ret: + +'``ret``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + ret ; Return a value from a non-void function + ret void ; Return from void function + +Overview: +""""""""" + +The '``ret``' instruction is used to return control flow (and optionally +a value) from a function back to the caller. + +There are two forms of the '``ret``' instruction: one that returns a +value and then causes control flow, and one that just causes control +flow to occur. + +Arguments: +"""""""""" + +The '``ret``' instruction optionally accepts a single argument, the +return value. The type of the return value must be a ':ref:`first +class `' type. + +A function is not :ref:`well formed ` if it it has a non-void +return type and contains a '``ret``' instruction with no return value or +a return value with a type that does not match its type, or if it has a +void return type and contains a '``ret``' instruction with a return +value. + +Semantics: +"""""""""" + +When the '``ret``' instruction is executed, control flow returns back to +the calling function's context. If the caller is a +":ref:`call `" instruction, execution continues at the +instruction after the call. If the caller was an +":ref:`invoke `" instruction, execution continues at the +beginning of the "normal" destination block. If the instruction returns +a value, that value shall set the call or invoke instruction's return +value. + +Example: +"""""""" + +.. code-block:: llvm + + ret i32 5 ; Return an integer value of 5 + ret void ; Return from a void function + ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2 + +.. _i_br: + +'``br``' Instruction +^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + br i1 , label , label + br label ; Unconditional branch + +Overview: +""""""""" + +The '``br``' instruction is used to cause control flow to transfer to a +different basic block in the current function. There are two forms of +this instruction, corresponding to a conditional branch and an +unconditional branch. + +Arguments: +"""""""""" + +The conditional branch form of the '``br``' instruction takes a single +'``i1``' value and two '``label``' values. The unconditional form of the +'``br``' instruction takes a single '``label``' value as a target. + +Semantics: +"""""""""" + +Upon execution of a conditional '``br``' instruction, the '``i1``' +argument is evaluated. If the value is ``true``, control flows to the +'``iftrue``' ``label`` argument. If "cond" is ``false``, control flows +to the '``iffalse``' ``label`` argument. + +Example: +"""""""" + +.. code-block:: llvm + + Test: + %cond = icmp eq i32 %a, %b + br i1 %cond, label %IfEqual, label %IfUnequal + IfEqual: + ret i32 1 + IfUnequal: + ret i32 0 + +.. _i_switch: + +'``switch``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + switch , label [ , label ... ] + +Overview: +""""""""" + +The '``switch``' instruction is used to transfer control flow to one of +several different places. It is a generalization of the '``br``' +instruction, allowing a branch to occur to one of many possible +destinations. + +Arguments: +"""""""""" + +The '``switch``' instruction uses three parameters: an integer +comparison value '``value``', a default '``label``' destination, and an +array of pairs of comparison value constants and '``label``'s. The table +is not allowed to contain duplicate constant entries. + +Semantics: +"""""""""" + +The ``switch`` instruction specifies a table of values and destinations. +When the '``switch``' instruction is executed, this table is searched +for the given value. If the value is found, control flow is transferred +to the corresponding destination; otherwise, control flow is transferred +to the default destination. + +Implementation: +""""""""""""""" + +Depending on properties of the target machine and the particular +``switch`` instruction, this instruction may be code generated in +different ways. For example, it could be generated as a series of +chained conditional branches or with a lookup table. + +Example: +"""""""" + +.. code-block:: llvm + + ; Emulate a conditional br instruction + %Val = zext i1 %value to i32 + switch i32 %Val, label %truedest [ i32 0, label %falsedest ] + + ; Emulate an unconditional br instruction + switch i32 0, label %dest [ ] + + ; Implement a jump table: + switch i32 %val, label %otherwise [ i32 0, label %onzero + i32 1, label %onone + i32 2, label %ontwo ] + +.. _i_indirectbr: + +'``indirectbr``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + indirectbr *
, [ label , label , ... ] + +Overview: +""""""""" + +The '``indirectbr``' instruction implements an indirect branch to a +label within the current function, whose address is specified by +"``address``". Address must be derived from a +:ref:`blockaddress ` constant. + +Arguments: +"""""""""" + +The '``address``' argument is the address of the label to jump to. The +rest of the arguments indicate the full set of possible destinations +that the address may point to. Blocks are allowed to occur multiple +times in the destination list, though this isn't particularly useful. + +This destination list is required so that dataflow analysis has an +accurate understanding of the CFG. + +Semantics: +"""""""""" + +Control transfers to the block specified in the address argument. All +possible destination blocks must be listed in the label list, otherwise +this instruction has undefined behavior. This implies that jumps to +labels defined in other functions have undefined behavior as well. + +Implementation: +""""""""""""""" + +This is typically implemented with a jump through a register. + +Example: +"""""""" + +.. code-block:: llvm + + indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ] + +.. _i_invoke: + +'``invoke``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = invoke [cconv] [ret attrs] () [fn attrs] + to label unwind label + +Overview: +""""""""" + +The '``invoke``' instruction causes control to transfer to a specified +function, with the possibility of control flow transfer to either the +'``normal``' label or the '``exception``' label. If the callee function +returns with the "``ret``" instruction, control flow will return to the +"normal" label. If the callee (or any indirect callees) returns via the +":ref:`resume `" instruction or other exception handling +mechanism, control is interrupted and continued at the dynamically +nearest "exception" label. + +The '``exception``' label is a `landing +pad `_ for the exception. As such, +'``exception``' label is required to have the +":ref:`landingpad `" instruction, which contains the +information about the behavior of the program after unwinding happens, +as its first non-PHI instruction. The restrictions on the +"``landingpad``" instruction's tightly couples it to the "``invoke``" +instruction, so that the important information contained within the +"``landingpad``" instruction can't be lost through normal code motion. + +Arguments: +"""""""""" + +This instruction requires several arguments: + +#. The optional "cconv" marker indicates which :ref:`calling + convention ` the call should use. If none is + specified, the call defaults to using C calling conventions. +#. The optional :ref:`Parameter Attributes ` list for return + values. Only '``zeroext``', '``signext``', and '``inreg``' attributes + are valid here. +#. '``ptr to function ty``': shall be the signature of the pointer to + function value being invoked. In most cases, this is a direct + function invocation, but indirect ``invoke``'s are just as possible, + branching off an arbitrary pointer to function value. +#. '``function ptr val``': An LLVM value containing a pointer to a + function to be invoked. +#. '``function args``': argument list whose types match the function + signature argument types and parameter attributes. All arguments must + be of :ref:`first class ` type. If the function signature + indicates the function accepts a variable number of arguments, the + extra arguments can be specified. +#. '``normal label``': the label reached when the called function + executes a '``ret``' instruction. +#. '``exception label``': the label reached when a callee returns via + the :ref:`resume ` instruction or other exception handling + mechanism. +#. The optional :ref:`function attributes ` list. Only + '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``' + attributes are valid here. + +Semantics: +"""""""""" + +This instruction is designed to operate as a standard '``call``' +instruction in most regards. The primary difference is that it +establishes an association with a label, which is used by the runtime +library to unwind the stack. + +This instruction is used in languages with destructors to ensure that +proper cleanup is performed in the case of either a ``longjmp`` or a +thrown exception. Additionally, this is important for implementation of +'``catch``' clauses in high-level languages that support them. + +For the purposes of the SSA form, the definition of the value returned +by the '``invoke``' instruction is deemed to occur on the edge from the +current block to the "normal" label. If the callee unwinds then no +return value is available. + +Example: +"""""""" + +.. code-block:: llvm + + %retval = invoke i32 @Test(i32 15) to label %Continue + unwind label %TestCleanup ; {i32}:retval set + %retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue + unwind label %TestCleanup ; {i32}:retval set + +.. _i_resume: + +'``resume``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + resume + +Overview: +""""""""" + +The '``resume``' instruction is a terminator instruction that has no +successors. + +Arguments: +"""""""""" + +The '``resume``' instruction requires one argument, which must have the +same type as the result of any '``landingpad``' instruction in the same +function. + +Semantics: +"""""""""" + +The '``resume``' instruction resumes propagation of an existing +(in-flight) exception whose unwinding was interrupted with a +:ref:`landingpad ` instruction. + +Example: +"""""""" + +.. code-block:: llvm + + resume { i8*, i32 } %exn + +.. _i_unreachable: + +'``unreachable``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + unreachable + +Overview: +""""""""" + +The '``unreachable``' instruction has no defined semantics. This +instruction is used to inform the optimizer that a particular portion of +the code is not reachable. This can be used to indicate that the code +after a no-return function cannot be reached, and other facts. + +Semantics: +"""""""""" + +The '``unreachable``' instruction has no defined semantics. + +.. _binaryops: + +Binary Operations +----------------- + +Binary operators are used to do most of the computation in a program. +They require two operands of the same type, execute an operation on +them, and produce a single value. The operands might represent multiple +data, as is the case with the :ref:`vector ` data type. The +result value has the same type as its operands. + +There are several different binary operators: + +.. _i_add: + +'``add``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = add , ; yields {ty}:result + = add nuw , ; yields {ty}:result + = add nsw , ; yields {ty}:result + = add nuw nsw , ; yields {ty}:result + +Overview: +""""""""" + +The '``add``' instruction returns the sum of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``add``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the integer sum of the two operands. + +If the sum has unsigned overflow, the result returned is the +mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of +the result. + +Because LLVM integers use a two's complement representation, this +instruction is appropriate for both signed and unsigned integers. + +``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap", +respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the +result value of the ``add`` is a :ref:`poison value ` if +unsigned and/or signed overflow, respectively, occurs. + +Example: +"""""""" + +.. code-block:: llvm + + = add i32 4, %var ; yields {i32}:result = 4 + %var + +.. _i_fadd: + +'``fadd``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fadd [fast-math flags]* , ; yields {ty}:result + +Overview: +""""""""" + +The '``fadd``' instruction returns the sum of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``fadd``' instruction must be :ref:`floating +point ` or :ref:`vector ` of floating point values. +Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating point sum of the two operands. This +instruction can also take any number of :ref:`fast-math flags `, +which are optimization hints to enable otherwise unsafe floating point +optimizations: + +Example: +"""""""" + +.. code-block:: llvm + + = fadd float 4.0, %var ; yields {float}:result = 4.0 + %var + +'``sub``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = sub , ; yields {ty}:result + = sub nuw , ; yields {ty}:result + = sub nsw , ; yields {ty}:result + = sub nuw nsw , ; yields {ty}:result + +Overview: +""""""""" + +The '``sub``' instruction returns the difference of its two operands. + +Note that the '``sub``' instruction is used to represent the '``neg``' +instruction present in most other intermediate representations. + +Arguments: +"""""""""" + +The two arguments to the '``sub``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the integer difference of the two operands. + +If the difference has unsigned overflow, the result returned is the +mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of +the result. + +Because LLVM integers use a two's complement representation, this +instruction is appropriate for both signed and unsigned integers. + +``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap", +respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the +result value of the ``sub`` is a :ref:`poison value ` if +unsigned and/or signed overflow, respectively, occurs. + +Example: +"""""""" + +.. code-block:: llvm + + = sub i32 4, %var ; yields {i32}:result = 4 - %var + = sub i32 0, %val ; yields {i32}:result = -%var + +.. _i_fsub: + +'``fsub``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fsub [fast-math flags]* , ; yields {ty}:result + +Overview: +""""""""" + +The '``fsub``' instruction returns the difference of its two operands. + +Note that the '``fsub``' instruction is used to represent the '``fneg``' +instruction present in most other intermediate representations. + +Arguments: +"""""""""" + +The two arguments to the '``fsub``' instruction must be :ref:`floating +point ` or :ref:`vector ` of floating point values. +Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating point difference of the two operands. +This instruction can also take any number of :ref:`fast-math +flags `, which are optimization hints to enable otherwise +unsafe floating point optimizations: + +Example: +"""""""" + +.. code-block:: llvm + + = fsub float 4.0, %var ; yields {float}:result = 4.0 - %var + = fsub float -0.0, %val ; yields {float}:result = -%var + +'``mul``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = mul , ; yields {ty}:result + = mul nuw , ; yields {ty}:result + = mul nsw , ; yields {ty}:result + = mul nuw nsw , ; yields {ty}:result + +Overview: +""""""""" + +The '``mul``' instruction returns the product of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``mul``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the integer product of the two operands. + +If the result of the multiplication has unsigned overflow, the result +returned is the mathematical result modulo 2\ :sup:`n`\ , where n is the +bit width of the result. + +Because LLVM integers use a two's complement representation, and the +result is the same width as the operands, this instruction returns the +correct result for both signed and unsigned integers. If a full product +(e.g. ``i32`` * ``i32`` -> ``i64``) is needed, the operands should be +sign-extended or zero-extended as appropriate to the width of the full +product. + +``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap", +respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the +result value of the ``mul`` is a :ref:`poison value ` if +unsigned and/or signed overflow, respectively, occurs. + +Example: +"""""""" + +.. code-block:: llvm + + = mul i32 4, %var ; yields {i32}:result = 4 * %var + +.. _i_fmul: + +'``fmul``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fmul [fast-math flags]* , ; yields {ty}:result + +Overview: +""""""""" + +The '``fmul``' instruction returns the product of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``fmul``' instruction must be :ref:`floating +point ` or :ref:`vector ` of floating point values. +Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating point product of the two operands. +This instruction can also take any number of :ref:`fast-math +flags `, which are optimization hints to enable otherwise +unsafe floating point optimizations: + +Example: +"""""""" + +.. code-block:: llvm + + = fmul float 4.0, %var ; yields {float}:result = 4.0 * %var + +'``udiv``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = udiv , ; yields {ty}:result + = udiv exact , ; yields {ty}:result + +Overview: +""""""""" + +The '``udiv``' instruction returns the quotient of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``udiv``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the unsigned integer quotient of the two operands. + +Note that unsigned integer division and signed integer division are +distinct operations; for signed integer division, use '``sdiv``'. + +Division by zero leads to undefined behavior. + +If the ``exact`` keyword is present, the result value of the ``udiv`` is +a :ref:`poison value ` if %op1 is not a multiple of %op2 (as +such, "((a udiv exact b) mul b) == a"). + +Example: +"""""""" + +.. code-block:: llvm + + = udiv i32 4, %var ; yields {i32}:result = 4 / %var + +'``sdiv``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = sdiv , ; yields {ty}:result + = sdiv exact , ; yields {ty}:result + +Overview: +""""""""" + +The '``sdiv``' instruction returns the quotient of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``sdiv``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the signed integer quotient of the two operands +rounded towards zero. + +Note that signed integer division and unsigned integer division are +distinct operations; for unsigned integer division, use '``udiv``'. + +Division by zero leads to undefined behavior. Overflow also leads to +undefined behavior; this is a rare case, but can occur, for example, by +doing a 32-bit division of -2147483648 by -1. + +If the ``exact`` keyword is present, the result value of the ``sdiv`` is +a :ref:`poison value ` if the result would be rounded. + +Example: +"""""""" + +.. code-block:: llvm + + = sdiv i32 4, %var ; yields {i32}:result = 4 / %var + +.. _i_fdiv: + +'``fdiv``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fdiv [fast-math flags]* , ; yields {ty}:result + +Overview: +""""""""" + +The '``fdiv``' instruction returns the quotient of its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``fdiv``' instruction must be :ref:`floating +point ` or :ref:`vector ` of floating point values. +Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating point quotient of the two operands. +This instruction can also take any number of :ref:`fast-math +flags `, which are optimization hints to enable otherwise +unsafe floating point optimizations: + +Example: +"""""""" + +.. code-block:: llvm + + = fdiv float 4.0, %var ; yields {float}:result = 4.0 / %var + +'``urem``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = urem , ; yields {ty}:result + +Overview: +""""""""" + +The '``urem``' instruction returns the remainder from the unsigned +division of its two arguments. + +Arguments: +"""""""""" + +The two arguments to the '``urem``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +This instruction returns the unsigned integer *remainder* of a division. +This instruction always performs an unsigned division to get the +remainder. + +Note that unsigned integer remainder and signed integer remainder are +distinct operations; for signed integer remainder, use '``srem``'. + +Taking the remainder of a division by zero leads to undefined behavior. + +Example: +"""""""" + +.. code-block:: llvm + + = urem i32 4, %var ; yields {i32}:result = 4 % %var + +'``srem``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = srem , ; yields {ty}:result + +Overview: +""""""""" + +The '``srem``' instruction returns the remainder from the signed +division of its two operands. This instruction can also take +:ref:`vector ` versions of the values in which case the elements +must be integers. + +Arguments: +"""""""""" + +The two arguments to the '``srem``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +This instruction returns the *remainder* of a division (where the result +is either zero or has the same sign as the dividend, ``op1``), not the +*modulo* operator (where the result is either zero or has the same sign +as the divisor, ``op2``) of a value. For more information about the +difference, see `The Math +Forum `_. For a +table of how this is implemented in various languages, please see +`Wikipedia: modulo +operation `_. + +Note that signed integer remainder and unsigned integer remainder are +distinct operations; for unsigned integer remainder, use '``urem``'. + +Taking the remainder of a division by zero leads to undefined behavior. +Overflow also leads to undefined behavior; this is a rare case, but can +occur, for example, by taking the remainder of a 32-bit division of +-2147483648 by -1. (The remainder doesn't actually overflow, but this +rule lets srem be implemented using instructions that return both the +result of the division and the remainder.) + +Example: +"""""""" + +.. code-block:: llvm + + = srem i32 4, %var ; yields {i32}:result = 4 % %var + +.. _i_frem: + +'``frem``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = frem [fast-math flags]* , ; yields {ty}:result + +Overview: +""""""""" + +The '``frem``' instruction returns the remainder from the division of +its two operands. + +Arguments: +"""""""""" + +The two arguments to the '``frem``' instruction must be :ref:`floating +point ` or :ref:`vector ` of floating point values. +Both arguments must have identical types. + +Semantics: +"""""""""" + +This instruction returns the *remainder* of a division. The remainder +has the same sign as the dividend. This instruction can also take any +number of :ref:`fast-math flags `, which are optimization hints +to enable otherwise unsafe floating point optimizations: + +Example: +"""""""" + +.. code-block:: llvm + + = frem float 4.0, %var ; yields {float}:result = 4.0 % %var + +.. _bitwiseops: + +Bitwise Binary Operations +------------------------- + +Bitwise binary operators are used to do various forms of bit-twiddling +in a program. They are generally very efficient instructions and can +commonly be strength reduced from other instructions. They require two +operands of the same type, execute an operation on them, and produce a +single value. The resulting value is the same type as its operands. + +'``shl``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = shl , ; yields {ty}:result + = shl nuw , ; yields {ty}:result + = shl nsw , ; yields {ty}:result + = shl nuw nsw , ; yields {ty}:result + +Overview: +""""""""" + +The '``shl``' instruction returns the first operand shifted to the left +a specified number of bits. + +Arguments: +"""""""""" + +Both arguments to the '``shl``' instruction must be the same +:ref:`integer ` or :ref:`vector ` of integer type. +'``op2``' is treated as an unsigned value. + +Semantics: +"""""""""" + +The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`, +where ``n`` is the width of the result. If ``op2`` is (statically or +dynamically) negative or equal to or larger than the number of bits in +``op1``, the result is undefined. If the arguments are vectors, each +vector element of ``op1`` is shifted by the corresponding shift amount +in ``op2``. + +If the ``nuw`` keyword is present, then the shift produces a :ref:`poison +value ` if it shifts out any non-zero bits. If the +``nsw`` keyword is present, then the shift produces a :ref:`poison +value ` if it shifts out any bits that disagree with the +resultant sign bit. As such, NUW/NSW have the same semantics as they +would if the shift were expressed as a mul instruction with the same +nsw/nuw bits in (mul %op1, (shl 1, %op2)). + +Example: +"""""""" + +.. code-block:: llvm + + = shl i32 4, %var ; yields {i32}: 4 << %var + = shl i32 4, 2 ; yields {i32}: 16 + = shl i32 1, 10 ; yields {i32}: 1024 + = shl i32 1, 32 ; undefined + = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 2, i32 4> + +'``lshr``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = lshr , ; yields {ty}:result + = lshr exact , ; yields {ty}:result + +Overview: +""""""""" + +The '``lshr``' instruction (logical shift right) returns the first +operand shifted to the right a specified number of bits with zero fill. + +Arguments: +"""""""""" + +Both arguments to the '``lshr``' instruction must be the same +:ref:`integer ` or :ref:`vector ` of integer type. +'``op2``' is treated as an unsigned value. + +Semantics: +"""""""""" + +This instruction always performs a logical shift right operation. The +most significant bits of the result will be filled with zero bits after +the shift. If ``op2`` is (statically or dynamically) equal to or larger +than the number of bits in ``op1``, the result is undefined. If the +arguments are vectors, each vector element of ``op1`` is shifted by the +corresponding shift amount in ``op2``. + +If the ``exact`` keyword is present, the result value of the ``lshr`` is +a :ref:`poison value ` if any of the bits shifted out are +non-zero. + +Example: +"""""""" + +.. code-block:: llvm + + = lshr i32 4, 1 ; yields {i32}:result = 2 + = lshr i32 4, 2 ; yields {i32}:result = 1 + = lshr i8 4, 3 ; yields {i8}:result = 0 + = lshr i8 -2, 1 ; yields {i8}:result = 0x7FFFFFFF + = lshr i32 1, 32 ; undefined + = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1> + +'``ashr``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = ashr , ; yields {ty}:result + = ashr exact , ; yields {ty}:result + +Overview: +""""""""" + +The '``ashr``' instruction (arithmetic shift right) returns the first +operand shifted to the right a specified number of bits with sign +extension. + +Arguments: +"""""""""" + +Both arguments to the '``ashr``' instruction must be the same +:ref:`integer ` or :ref:`vector ` of integer type. +'``op2``' is treated as an unsigned value. + +Semantics: +"""""""""" + +This instruction always performs an arithmetic shift right operation, +The most significant bits of the result will be filled with the sign bit +of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger +than the number of bits in ``op1``, the result is undefined. If the +arguments are vectors, each vector element of ``op1`` is shifted by the +corresponding shift amount in ``op2``. + +If the ``exact`` keyword is present, the result value of the ``ashr`` is +a :ref:`poison value ` if any of the bits shifted out are +non-zero. + +Example: +"""""""" + +.. code-block:: llvm + + = ashr i32 4, 1 ; yields {i32}:result = 2 + = ashr i32 4, 2 ; yields {i32}:result = 1 + = ashr i8 4, 3 ; yields {i8}:result = 0 + = ashr i8 -2, 1 ; yields {i8}:result = -1 + = ashr i32 1, 32 ; undefined + = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3> ; yields: result=<2 x i32> < i32 -1, i32 0> + +'``and``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = and , ; yields {ty}:result + +Overview: +""""""""" + +The '``and``' instruction returns the bitwise logical and of its two +operands. + +Arguments: +"""""""""" + +The two arguments to the '``and``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The truth table used for the '``and``' instruction is: + ++-----+-----+-----+ +| In0 | In1 | Out | ++-----+-----+-----+ +| 0 | 0 | 0 | ++-----+-----+-----+ +| 0 | 1 | 0 | ++-----+-----+-----+ +| 1 | 0 | 0 | ++-----+-----+-----+ +| 1 | 1 | 1 | ++-----+-----+-----+ + +Example: +"""""""" + +.. code-block:: llvm + + = and i32 4, %var ; yields {i32}:result = 4 & %var + = and i32 15, 40 ; yields {i32}:result = 8 + = and i32 4, 8 ; yields {i32}:result = 0 + +'``or``' Instruction +^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = or , ; yields {ty}:result + +Overview: +""""""""" + +The '``or``' instruction returns the bitwise logical inclusive or of its +two operands. + +Arguments: +"""""""""" + +The two arguments to the '``or``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The truth table used for the '``or``' instruction is: + ++-----+-----+-----+ +| In0 | In1 | Out | ++-----+-----+-----+ +| 0 | 0 | 0 | ++-----+-----+-----+ +| 0 | 1 | 1 | ++-----+-----+-----+ +| 1 | 0 | 1 | ++-----+-----+-----+ +| 1 | 1 | 1 | ++-----+-----+-----+ + +Example: +"""""""" + +:: + + = or i32 4, %var ; yields {i32}:result = 4 | %var + = or i32 15, 40 ; yields {i32}:result = 47 + = or i32 4, 8 ; yields {i32}:result = 12 + +'``xor``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = xor , ; yields {ty}:result + +Overview: +""""""""" + +The '``xor``' instruction returns the bitwise logical exclusive or of +its two operands. The ``xor`` is used to implement the "one's +complement" operation, which is the "~" operator in C. + +Arguments: +"""""""""" + +The two arguments to the '``xor``' instruction must be +:ref:`integer ` or :ref:`vector ` of integer values. Both +arguments must have identical types. + +Semantics: +"""""""""" + +The truth table used for the '``xor``' instruction is: + ++-----+-----+-----+ +| In0 | In1 | Out | ++-----+-----+-----+ +| 0 | 0 | 0 | ++-----+-----+-----+ +| 0 | 1 | 1 | ++-----+-----+-----+ +| 1 | 0 | 1 | ++-----+-----+-----+ +| 1 | 1 | 0 | ++-----+-----+-----+ + +Example: +"""""""" + +.. code-block:: llvm + + = xor i32 4, %var ; yields {i32}:result = 4 ^ %var + = xor i32 15, 40 ; yields {i32}:result = 39 + = xor i32 4, 8 ; yields {i32}:result = 12 + = xor i32 %V, -1 ; yields {i32}:result = ~%V + +Vector Operations +----------------- + +LLVM supports several instructions to represent vector operations in a +target-independent manner. These instructions cover the element-access +and vector-specific operations needed to process vectors effectively. +While LLVM does directly support these vector operations, many +sophisticated algorithms will want to use target-specific intrinsics to +take full advantage of a specific target. + +.. _i_extractelement: + +'``extractelement``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = extractelement > , i32 ; yields + +Overview: +""""""""" + +The '``extractelement``' instruction extracts a single scalar element +from a vector at a specified index. + +Arguments: +"""""""""" + +The first operand of an '``extractelement``' instruction is a value of +:ref:`vector ` type. The second operand is an index indicating +the position from which to extract the element. The index may be a +variable. + +Semantics: +"""""""""" + +The result is a scalar of the same type as the element type of ``val``. +Its value is the value at position ``idx`` of ``val``. If ``idx`` +exceeds the length of ``val``, the results are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + = extractelement <4 x i32> %vec, i32 0 ; yields i32 + +.. _i_insertelement: + +'``insertelement``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = insertelement > , , i32 ; yields > + +Overview: +""""""""" + +The '``insertelement``' instruction inserts a scalar element into a +vector at a specified index. + +Arguments: +"""""""""" + +The first operand of an '``insertelement``' instruction is a value of +:ref:`vector ` type. The second operand is a scalar value whose +type must equal the element type of the first operand. The third operand +is an index indicating the position at which to insert the value. The +index may be a variable. + +Semantics: +"""""""""" + +The result is a vector of the same type as ``val``. Its element values +are those of ``val`` except at position ``idx``, where it gets the value +``elt``. If ``idx`` exceeds the length of ``val``, the results are +undefined. + +Example: +"""""""" + +.. code-block:: llvm + + = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32> + +.. _i_shufflevector: + +'``shufflevector``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = shufflevector > , > , ; yields > + +Overview: +""""""""" + +The '``shufflevector``' instruction constructs a permutation of elements +from two input vectors, returning a vector with the same element type as +the input and length that is the same as the shuffle mask. + +Arguments: +"""""""""" + +The first two operands of a '``shufflevector``' instruction are vectors +with the same type. The third argument is a shuffle mask whose element +type is always 'i32'. The result of the instruction is a vector whose +length is the same as the shuffle mask and whose element type is the +same as the element type of the first two operands. + +The shuffle mask operand is required to be a constant vector with either +constant integer or undef values. + +Semantics: +"""""""""" + +The elements of the two input vectors are numbered from left to right +across both of the vectors. The shuffle mask operand specifies, for each +element of the result vector, which element of the two input vectors the +result element gets. The element selector may be undef (meaning "don't +care") and the second operand may be undef if performing a shuffle from +only one vector. + +Example: +"""""""" + +.. code-block:: llvm + + = shufflevector <4 x i32> %v1, <4 x i32> %v2, + <4 x i32> ; yields <4 x i32> + = shufflevector <4 x i32> %v1, <4 x i32> undef, + <4 x i32> ; yields <4 x i32> - Identity shuffle. + = shufflevector <8 x i32> %v1, <8 x i32> undef, + <4 x i32> ; yields <4 x i32> + = shufflevector <4 x i32> %v1, <4 x i32> %v2, + <8 x i32> ; yields <8 x i32> + +Aggregate Operations +-------------------- + +LLVM supports several instructions for working with +:ref:`aggregate ` values. + +.. _i_extractvalue: + +'``extractvalue``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = extractvalue , {, }* + +Overview: +""""""""" + +The '``extractvalue``' instruction extracts the value of a member field +from an :ref:`aggregate ` value. + +Arguments: +"""""""""" + +The first operand of an '``extractvalue``' instruction is a value of +:ref:`struct ` or :ref:`array ` type. The operands are +constant indices to specify which value to extract in a similar manner +as indices in a '``getelementptr``' instruction. + +The major differences to ``getelementptr`` indexing are: + +- Since the value being indexed is not a pointer, the first index is + omitted and assumed to be zero. +- At least one index must be specified. +- Not only struct indices but also array indices must be in bounds. + +Semantics: +"""""""""" + +The result is the value at the position in the aggregate specified by +the index operands. + +Example: +"""""""" + +.. code-block:: llvm + + = extractvalue {i32, float} %agg, 0 ; yields i32 + +.. _i_insertvalue: + +'``insertvalue``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = insertvalue , , {, }* ; yields + +Overview: +""""""""" + +The '``insertvalue``' instruction inserts a value into a member field in +an :ref:`aggregate ` value. + +Arguments: +"""""""""" + +The first operand of an '``insertvalue``' instruction is a value of +:ref:`struct ` or :ref:`array ` type. The second operand is +a first-class value to insert. The following operands are constant +indices indicating the position at which to insert the value in a +similar manner as indices in a '``extractvalue``' instruction. The value +to insert must have the same type as the value identified by the +indices. + +Semantics: +"""""""""" + +The result is an aggregate of the same type as ``val``. Its value is +that of ``val`` except that the value at the position specified by the +indices is that of ``elt``. + +Example: +"""""""" + +.. code-block:: llvm + + %agg1 = insertvalue {i32, float} undef, i32 1, 0 ; yields {i32 1, float undef} + %agg2 = insertvalue {i32, float} %agg1, float %val, 1 ; yields {i32 1, float %val} + %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0 ; yields {i32 1, float %val} + +.. _memoryops: + +Memory Access and Addressing Operations +--------------------------------------- + +A key design point of an SSA-based representation is how it represents +memory. In LLVM, no memory locations are in SSA form, which makes things +very simple. This section describes how to read, write, and allocate +memory in LLVM. + +.. _i_alloca: + +'``alloca``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = alloca [, ][, align ] ; yields {type*}:result + +Overview: +""""""""" + +The '``alloca``' instruction allocates memory on the stack frame of the +currently executing function, to be automatically released when this +function returns to its caller. The object is always allocated in the +generic address space (address space zero). + +Arguments: +"""""""""" + +The '``alloca``' instruction allocates ``sizeof()*NumElements`` +bytes of memory on the runtime stack, returning a pointer of the +appropriate type to the program. If "NumElements" is specified, it is +the number of elements allocated, otherwise "NumElements" is defaulted +to be one. If a constant alignment is specified, the value result of the +allocation is guaranteed to be aligned to at least that boundary. If not +specified, or if zero, the target can choose to align the allocation on +any convenient boundary compatible with the type. + +'``type``' may be any sized type. + +Semantics: +"""""""""" + +Memory is allocated; a pointer is returned. The operation is undefined +if there is insufficient stack space for the allocation. '``alloca``'d +memory is automatically released when the function returns. The +'``alloca``' instruction is commonly used to represent automatic +variables that must have an address available. When the function returns +(either with the ``ret`` or ``resume`` instructions), the memory is +reclaimed. Allocating zero bytes is legal, but the result is undefined. +The order in which memory is allocated (ie., which way the stack grows) +is not specified. + +Example: +"""""""" + +.. code-block:: llvm + + %ptr = alloca i32 ; yields {i32*}:ptr + %ptr = alloca i32, i32 4 ; yields {i32*}:ptr + %ptr = alloca i32, i32 4, align 1024 ; yields {i32*}:ptr + %ptr = alloca i32, align 1024 ; yields {i32*}:ptr + +.. _i_load: + +'``load``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = load [volatile] * [, align ][, !nontemporal !][, !invariant.load !] + = load atomic [volatile] * [singlethread] , align + ! = !{ i32 1 } + +Overview: +""""""""" + +The '``load``' instruction is used to read from memory. + +Arguments: +"""""""""" + +The argument to the '``load``' instruction specifies the memory address +from which to load. The pointer must point to a :ref:`first +class ` type. If the ``load`` is marked as ``volatile``, +then the optimizer is not allowed to modify the number or order of +execution of this ``load`` with other :ref:`volatile +operations `. + +If the ``load`` is marked as ``atomic``, it takes an extra +:ref:`ordering ` and optional ``singlethread`` argument. The +``release`` and ``acq_rel`` orderings are not valid on ``load`` +instructions. Atomic loads produce :ref:`defined ` results +when they may see multiple atomic stores. The type of the pointee must +be an integer type whose bit width is a power of two greater than or +equal to eight and less than or equal to a target-specific size limit. +``align`` must be explicitly specified on atomic loads, and the load has +undefined behavior if the alignment is not set to a value which is at +least the size in bytes of the pointee. ``!nontemporal`` does not have +any defined semantics for atomic loads. + +The optional constant ``align`` argument specifies the alignment of the +operation (that is, the alignment of the memory address). A value of 0 +or an omitted ``align`` argument means that the operation has the abi +alignment for the target. It is the responsibility of the code emitter +to ensure that the alignment information is correct. Overestimating the +alignment results in undefined behavior. Underestimating the alignment +may produce less efficient code. An alignment of 1 is always safe. + +The optional ``!nontemporal`` metadata must reference a single +metatadata name corresponding to a metadata node with one +``i32`` entry of value 1. The existence of the ``!nontemporal`` +metatadata on the instruction tells the optimizer and code generator +that this load is not expected to be reused in the cache. The code +generator may select special instructions to save cache bandwidth, such +as the ``MOVNT`` instruction on x86. + +The optional ``!invariant.load`` metadata must reference a single +metatadata name corresponding to a metadata node with no +entries. The existence of the ``!invariant.load`` metatadata on the +instruction tells the optimizer and code generator that this load +address points to memory which does not change value during program +execution. The optimizer may then move this load around, for example, by +hoisting it out of loops using loop invariant code motion. + +Semantics: +"""""""""" + +The location of memory pointed to is loaded. If the value being loaded +is of scalar type then the number of bytes read does not exceed the +minimum number of bytes needed to hold all bits of the type. For +example, loading an ``i24`` reads at most three bytes. When loading a +value of a type like ``i20`` with a size that is not an integral number +of bytes, the result is undefined if the value was not originally +written using a store of the same type. + +Examples: +""""""""" + +.. code-block:: llvm + + %ptr = alloca i32 ; yields {i32*}:ptr + store i32 3, i32* %ptr ; yields {void} + %val = load i32* %ptr ; yields {i32}:val = i32 3 + +.. _i_store: + +'``store``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + store [volatile] , * [, align ][, !nontemporal !] ; yields {void} + store atomic [volatile] , * [singlethread] , align ; yields {void} + +Overview: +""""""""" + +The '``store``' instruction is used to write to memory. + +Arguments: +"""""""""" + +There are two arguments to the '``store``' instruction: a value to store +and an address at which to store it. The type of the '````' +operand must be a pointer to the :ref:`first class ` type of +the '````' operand. If the ``store`` is marked as ``volatile``, +then the optimizer is not allowed to modify the number or order of +execution of this ``store`` with other :ref:`volatile +operations `. + +If the ``store`` is marked as ``atomic``, it takes an extra +:ref:`ordering ` and optional ``singlethread`` argument. The +``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` +instructions. Atomic loads produce :ref:`defined ` results +when they may see multiple atomic stores. The type of the pointee must +be an integer type whose bit width is a power of two greater than or +equal to eight and less than or equal to a target-specific size limit. +``align`` must be explicitly specified on atomic stores, and the store +has undefined behavior if the alignment is not set to a value which is +at least the size in bytes of the pointee. ``!nontemporal`` does not +have any defined semantics for atomic stores. + +The optional constant "align" argument specifies the alignment of the +operation (that is, the alignment of the memory address). A value of 0 +or an omitted "align" argument means that the operation has the abi +alignment for the target. It is the responsibility of the code emitter +to ensure that the alignment information is correct. Overestimating the +alignment results in an undefined behavior. Underestimating the +alignment may produce less efficient code. An alignment of 1 is always +safe. + +The optional !nontemporal metadata must reference a single metatadata +name corresponding to a metadata node with one i32 entry of +value 1. The existence of the !nontemporal metatadata on the instruction +tells the optimizer and code generator that this load is not expected to +be reused in the cache. The code generator may select special +instructions to save cache bandwidth, such as the MOVNT instruction on +x86. + +Semantics: +"""""""""" + +The contents of memory are updated to contain '````' at the +location specified by the '````' operand. If '````' is +of scalar type then the number of bytes written does not exceed the +minimum number of bytes needed to hold all bits of the type. For +example, storing an ``i24`` writes at most three bytes. When writing a +value of a type like ``i20`` with a size that is not an integral number +of bytes, it is unspecified what happens to the extra bits that do not +belong to the type, but they will typically be overwritten. + +Example: +"""""""" + +.. code-block:: llvm + + %ptr = alloca i32 ; yields {i32*}:ptr + store i32 3, i32* %ptr ; yields {void} + %val = load i32* %ptr ; yields {i32}:val = i32 3 + +.. _i_fence: + +'``fence``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + fence [singlethread] ; yields {void} + +Overview: +""""""""" + +The '``fence``' instruction is used to introduce happens-before edges +between operations. + +Arguments: +"""""""""" + +'``fence``' instructions take an :ref:`ordering ` argument which +defines what *synchronizes-with* edges they add. They can only be given +``acquire``, ``release``, ``acq_rel``, and ``seq_cst`` orderings. + +Semantics: +"""""""""" + +A fence A which has (at least) ``release`` ordering semantics +*synchronizes with* a fence B with (at least) ``acquire`` ordering +semantics if and only if there exist atomic operations X and Y, both +operating on some atomic object M, such that A is sequenced before X, X +modifies M (either directly or through some side effect of a sequence +headed by X), Y is sequenced before B, and Y observes M. This provides a +*happens-before* dependency between A and B. Rather than an explicit +``fence``, one (but not both) of the atomic operations X or Y might +provide a ``release`` or ``acquire`` (resp.) ordering constraint and +still *synchronize-with* the explicit ``fence`` and establish the +*happens-before* edge. + +A ``fence`` which has ``seq_cst`` ordering, in addition to having both +``acquire`` and ``release`` semantics specified above, participates in +the global program order of other ``seq_cst`` operations and/or fences. + +The optional ":ref:`singlethread `" argument specifies +that the fence only synchronizes with other fences in the same thread. +(This is useful for interacting with signal handlers.) + +Example: +"""""""" + +.. code-block:: llvm + + fence acquire ; yields {void} + fence singlethread seq_cst ; yields {void} + +.. _i_cmpxchg: + +'``cmpxchg``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + cmpxchg [volatile] * , , [singlethread] ; yields {ty} + +Overview: +""""""""" + +The '``cmpxchg``' instruction is used to atomically modify memory. It +loads a value in memory and compares it to a given value. If they are +equal, it stores a new value into the memory. + +Arguments: +"""""""""" + +There are three arguments to the '``cmpxchg``' instruction: an address +to operate on, a value to compare to the value currently be at that +address, and a new value to place at that address if the compared values +are equal. The type of '' must be an integer type whose bit width +is a power of two greater than or equal to eight and less than or equal +to a target-specific size limit. '' and '' must have the same +type, and the type of '' must be a pointer to that type. If the +``cmpxchg`` is marked as ``volatile``, then the optimizer is not allowed +to modify the number or order of execution of this ``cmpxchg`` with +other :ref:`volatile operations `. + +The :ref:`ordering ` argument specifies how this ``cmpxchg`` +synchronizes with other atomic operations. + +The optional "``singlethread``" argument declares that the ``cmpxchg`` +is only atomic with respect to code (usually signal handlers) running in +the same thread as the ``cmpxchg``. Otherwise the cmpxchg is atomic with +respect to all other code in the system. + +The pointer passed into cmpxchg must have alignment greater than or +equal to the size in memory of the operand. + +Semantics: +"""""""""" + +The contents of memory at the location specified by the '````' +operand is read and compared to '````'; if the read value is the +equal, '````' is written. The original value at the location is +returned. + +A successful ``cmpxchg`` is a read-modify-write instruction for the purpose +of identifying release sequences. A failed ``cmpxchg`` is equivalent to an +atomic load with an ordering parameter determined by dropping any +``release`` part of the ``cmpxchg``'s ordering. + +Example: +"""""""" + +.. code-block:: llvm + + entry: + %orig = atomic load i32* %ptr unordered ; yields {i32} + br label %loop + + loop: + %cmp = phi i32 [ %orig, %entry ], [%old, %loop] + %squared = mul i32 %cmp, %cmp + %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared ; yields {i32} + %success = icmp eq i32 %cmp, %old + br i1 %success, label %done, label %loop + + done: + ... + +.. _i_atomicrmw: + +'``atomicrmw``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + atomicrmw [volatile] * , [singlethread] ; yields {ty} + +Overview: +""""""""" + +The '``atomicrmw``' instruction is used to atomically modify memory. + +Arguments: +"""""""""" + +There are three arguments to the '``atomicrmw``' instruction: an +operation to apply, an address whose value to modify, an argument to the +operation. The operation must be one of the following keywords: + +- xchg +- add +- sub +- and +- nand +- or +- xor +- max +- min +- umax +- umin + +The type of '' must be an integer type whose bit width is a power +of two greater than or equal to eight and less than or equal to a +target-specific size limit. The type of the '````' operand must +be a pointer to that type. If the ``atomicrmw`` is marked as +``volatile``, then the optimizer is not allowed to modify the number or +order of execution of this ``atomicrmw`` with other :ref:`volatile +operations `. + +Semantics: +"""""""""" + +The contents of memory at the location specified by the '````' +operand are atomically read, modified, and written back. The original +value at the location is returned. The modification is specified by the +operation argument: + +- xchg: ``*ptr = val`` +- add: ``*ptr = *ptr + val`` +- sub: ``*ptr = *ptr - val`` +- and: ``*ptr = *ptr & val`` +- nand: ``*ptr = ~(*ptr & val)`` +- or: ``*ptr = *ptr | val`` +- xor: ``*ptr = *ptr ^ val`` +- max: ``*ptr = *ptr > val ? *ptr : val`` (using a signed comparison) +- min: ``*ptr = *ptr < val ? *ptr : val`` (using a signed comparison) +- umax: ``*ptr = *ptr > val ? *ptr : val`` (using an unsigned + comparison) +- umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned + comparison) + +Example: +"""""""" + +.. code-block:: llvm + + %old = atomicrmw add i32* %ptr, i32 1 acquire ; yields {i32} + +.. _i_getelementptr: + +'``getelementptr``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = getelementptr * {, }* + = getelementptr inbounds * {, }* + = getelementptr ptrval, idx + +Overview: +""""""""" + +The '``getelementptr``' instruction is used to get the address of a +subelement of an :ref:`aggregate ` data structure. It performs +address calculation only and does not access memory. + +Arguments: +"""""""""" + +The first argument is always a pointer or a vector of pointers, and +forms the basis of the calculation. The remaining arguments are indices +that indicate which of the elements of the aggregate object are indexed. +The interpretation of each index is dependent on the type being indexed +into. The first index always indexes the pointer value given as the +first argument, the second index indexes a value of the type pointed to +(not necessarily the value directly pointed to, since the first index +can be non-zero), etc. The first type indexed into must be a pointer +value, subsequent types can be arrays, vectors, and structs. Note that +subsequent types being indexed into can never be pointers, since that +would require loading the pointer before continuing calculation. + +The type of each index argument depends on the type it is indexing into. +When indexing into a (optionally packed) structure, only ``i32`` integer +**constants** are allowed (when using a vector of indices they must all +be the **same** ``i32`` integer constant). When indexing into an array, +pointer or vector, integers of any width are allowed, and they are not +required to be constant. These integers are treated as signed values +where relevant. + +For example, let's consider a C code fragment and how it gets compiled +to LLVM: + +.. code-block:: c + + struct RT { + char A; + int B[10][20]; + char C; + }; + struct ST { + int X; + double Y; + struct RT Z; + }; + + int *foo(struct ST *s) { + return &s[1].Z.B[5][13]; + } + +The LLVM code generated by Clang is: + +.. code-block:: llvm + + %struct.RT = type { i8, [10 x [20 x i32]], i8 } + %struct.ST = type { i32, double, %struct.RT } + + define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { + entry: + %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13 + ret i32* %arrayidx + } + +Semantics: +"""""""""" + +In the example above, the first index is indexing into the +'``%struct.ST*``' type, which is a pointer, yielding a '``%struct.ST``' += '``{ i32, double, %struct.RT }``' type, a structure. The second index +indexes into the third element of the structure, yielding a +'``%struct.RT``' = '``{ i8 , [10 x [20 x i32]], i8 }``' type, another +structure. The third index indexes into the second element of the +structure, yielding a '``[10 x [20 x i32]]``' type, an array. The two +dimensions of the array are subscripted into, yielding an '``i32``' +type. The '``getelementptr``' instruction returns a pointer to this +element, thus computing a value of '``i32*``' type. + +Note that it is perfectly legal to index partially through a structure, +returning a pointer to an inner element. Because of this, the LLVM code +for the given testcase is equivalent to: + +.. code-block:: llvm + + define i32* @foo(%struct.ST* %s) { + %t1 = getelementptr %struct.ST* %s, i32 1 ; yields %struct.ST*:%t1 + %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2 ; yields %struct.RT*:%t2 + %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1 ; yields [10 x [20 x i32]]*:%t3 + %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5 ; yields [20 x i32]*:%t4 + %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13 ; yields i32*:%t5 + ret i32* %t5 + } + +If the ``inbounds`` keyword is present, the result value of the +``getelementptr`` is a :ref:`poison value ` if the base +pointer is not an *in bounds* address of an allocated object, or if any +of the addresses that would be formed by successive addition of the +offsets implied by the indices to the base address with infinitely +precise signed arithmetic are not an *in bounds* address of that +allocated object. The *in bounds* addresses for an allocated object are +all the addresses that point into the object, plus the address one byte +past the end. In cases where the base is a vector of pointers the +``inbounds`` keyword applies to each of the computations element-wise. + +If the ``inbounds`` keyword is not present, the offsets are added to the +base address with silently-wrapping two's complement arithmetic. If the +offsets have a different width from the pointer, they are sign-extended +or truncated to the width of the pointer. The result value of the +``getelementptr`` may be outside the object pointed to by the base +pointer. The result value may not necessarily be used to access memory +though, even if it happens to point into allocated storage. See the +:ref:`Pointer Aliasing Rules ` section for more +information. + +The getelementptr instruction is often confusing. For some more insight +into how it works, see :doc:`the getelementptr FAQ `. + +Example: +"""""""" + +.. code-block:: llvm + + ; yields [12 x i8]*:aptr + %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1 + ; yields i8*:vptr + %vptr = getelementptr {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1 + ; yields i8*:eptr + %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1 + ; yields i32*:iptr + %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0 + +In cases where the pointer argument is a vector of pointers, each index +must be a vector with the same number of elements. For example: + +.. code-block:: llvm + + %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets, + +Conversion Operations +--------------------- + +The instructions in this category are the conversion instructions +(casting) which all take a single operand and a type. They perform +various bit conversions on the operand. + +'``trunc .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = trunc to ; yields ty2 + +Overview: +""""""""" + +The '``trunc``' instruction truncates its operand to the type ``ty2``. + +Arguments: +"""""""""" + +The '``trunc``' instruction takes a value to trunc, and a type to trunc +it to. Both types must be of :ref:`integer ` types, or vectors +of the same number of integers. The bit size of the ``value`` must be +larger than the bit size of the destination type, ``ty2``. Equal sized +types are not allowed. + +Semantics: +"""""""""" + +The '``trunc``' instruction truncates the high order bits in ``value`` +and converts the remaining bits to ``ty2``. Since the source size must +be larger than the destination size, ``trunc`` cannot be a *no-op cast*. +It will always truncate bits. + +Example: +"""""""" + +.. code-block:: llvm + + %X = trunc i32 257 to i8 ; yields i8:1 + %Y = trunc i32 123 to i1 ; yields i1:true + %Z = trunc i32 122 to i1 ; yields i1:false + %W = trunc <2 x i16> to <2 x i8> ; yields + +'``zext .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = zext to ; yields ty2 + +Overview: +""""""""" + +The '``zext``' instruction zero extends its operand to type ``ty2``. + +Arguments: +"""""""""" + +The '``zext``' instruction takes a value to cast, and a type to cast it +to. Both types must be of :ref:`integer ` types, or vectors of +the same number of integers. The bit size of the ``value`` must be +smaller than the bit size of the destination type, ``ty2``. + +Semantics: +"""""""""" + +The ``zext`` fills the high order bits of the ``value`` with zero bits +until it reaches the size of the destination type, ``ty2``. + +When zero extending from i1, the result will always be either 0 or 1. + +Example: +"""""""" + +.. code-block:: llvm + + %X = zext i32 257 to i64 ; yields i64:257 + %Y = zext i1 true to i32 ; yields i32:1 + %Z = zext <2 x i16> to <2 x i32> ; yields + +'``sext .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = sext to ; yields ty2 + +Overview: +""""""""" + +The '``sext``' sign extends ``value`` to the type ``ty2``. + +Arguments: +"""""""""" + +The '``sext``' instruction takes a value to cast, and a type to cast it +to. Both types must be of :ref:`integer ` types, or vectors of +the same number of integers. The bit size of the ``value`` must be +smaller than the bit size of the destination type, ``ty2``. + +Semantics: +"""""""""" + +The '``sext``' instruction performs a sign extension by copying the sign +bit (highest order bit) of the ``value`` until it reaches the bit size +of the type ``ty2``. + +When sign extending from i1, the extension always results in -1 or 0. + +Example: +"""""""" + +.. code-block:: llvm + + %X = sext i8 -1 to i16 ; yields i16 :65535 + %Y = sext i1 true to i32 ; yields i32:-1 + %Z = sext <2 x i16> to <2 x i32> ; yields + +'``fptrunc .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fptrunc to ; yields ty2 + +Overview: +""""""""" + +The '``fptrunc``' instruction truncates ``value`` to type ``ty2``. + +Arguments: +"""""""""" + +The '``fptrunc``' instruction takes a :ref:`floating point ` +value to cast and a :ref:`floating point ` type to cast it to. +The size of ``value`` must be larger than the size of ``ty2``. This +implies that ``fptrunc`` cannot be used to make a *no-op cast*. + +Semantics: +"""""""""" + +The '``fptrunc``' instruction truncates a ``value`` from a larger +:ref:`floating point ` type to a smaller :ref:`floating +point ` type. If the value cannot fit within the +destination type, ``ty2``, then the results are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + %X = fptrunc double 123.0 to float ; yields float:123.0 + %Y = fptrunc double 1.0E+300 to float ; yields undefined + +'``fpext .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fpext to ; yields ty2 + +Overview: +""""""""" + +The '``fpext``' extends a floating point ``value`` to a larger floating +point value. + +Arguments: +"""""""""" + +The '``fpext``' instruction takes a :ref:`floating point ` +``value`` to cast, and a :ref:`floating point ` type to cast it +to. The source type must be smaller than the destination type. + +Semantics: +"""""""""" + +The '``fpext``' instruction extends the ``value`` from a smaller +:ref:`floating point ` type to a larger :ref:`floating +point ` type. The ``fpext`` cannot be used to make a +*no-op cast* because it always changes bits. Use ``bitcast`` to make a +*no-op cast* for a floating point cast. + +Example: +"""""""" + +.. code-block:: llvm + + %X = fpext float 3.125 to double ; yields double:3.125000e+00 + %Y = fpext double %X to fp128 ; yields fp128:0xL00000000000000004000900000000000 + +'``fptoui .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fptoui to ; yields ty2 + +Overview: +""""""""" + +The '``fptoui``' converts a floating point ``value`` to its unsigned +integer equivalent of type ``ty2``. + +Arguments: +"""""""""" + +The '``fptoui``' instruction takes a value to cast, which must be a +scalar or vector :ref:`floating point ` value, and a type to +cast it to ``ty2``, which must be an :ref:`integer ` type. If +``ty`` is a vector floating point type, ``ty2`` must be a vector integer +type with the same number of elements as ``ty`` + +Semantics: +"""""""""" + +The '``fptoui``' instruction converts its :ref:`floating +point ` operand into the nearest (rounding towards zero) +unsigned integer value. If the value cannot fit in ``ty2``, the results +are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + %X = fptoui double 123.0 to i32 ; yields i32:123 + %Y = fptoui float 1.0E+300 to i1 ; yields undefined:1 + %Z = fptoui float 1.04E+17 to i8 ; yields undefined:1 + +'``fptosi .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fptosi to ; yields ty2 + +Overview: +""""""""" + +The '``fptosi``' instruction converts :ref:`floating point ` +``value`` to type ``ty2``. + +Arguments: +"""""""""" + +The '``fptosi``' instruction takes a value to cast, which must be a +scalar or vector :ref:`floating point ` value, and a type to +cast it to ``ty2``, which must be an :ref:`integer ` type. If +``ty`` is a vector floating point type, ``ty2`` must be a vector integer +type with the same number of elements as ``ty`` + +Semantics: +"""""""""" + +The '``fptosi``' instruction converts its :ref:`floating +point ` operand into the nearest (rounding towards zero) +signed integer value. If the value cannot fit in ``ty2``, the results +are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + %X = fptosi double -123.0 to i32 ; yields i32:-123 + %Y = fptosi float 1.0E-247 to i1 ; yields undefined:1 + %Z = fptosi float 1.04E+17 to i8 ; yields undefined:1 + +'``uitofp .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = uitofp to ; yields ty2 + +Overview: +""""""""" + +The '``uitofp``' instruction regards ``value`` as an unsigned integer +and converts that value to the ``ty2`` type. + +Arguments: +"""""""""" + +The '``uitofp``' instruction takes a value to cast, which must be a +scalar or vector :ref:`integer ` value, and a type to cast it to +``ty2``, which must be an :ref:`floating point ` type. If +``ty`` is a vector integer type, ``ty2`` must be a vector floating point +type with the same number of elements as ``ty`` + +Semantics: +"""""""""" + +The '``uitofp``' instruction interprets its operand as an unsigned +integer quantity and converts it to the corresponding floating point +value. If the value cannot fit in the floating point value, the results +are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + %X = uitofp i32 257 to float ; yields float:257.0 + %Y = uitofp i8 -1 to double ; yields double:255.0 + +'``sitofp .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = sitofp to ; yields ty2 + +Overview: +""""""""" + +The '``sitofp``' instruction regards ``value`` as a signed integer and +converts that value to the ``ty2`` type. + +Arguments: +"""""""""" + +The '``sitofp``' instruction takes a value to cast, which must be a +scalar or vector :ref:`integer ` value, and a type to cast it to +``ty2``, which must be an :ref:`floating point ` type. If +``ty`` is a vector integer type, ``ty2`` must be a vector floating point +type with the same number of elements as ``ty`` + +Semantics: +"""""""""" + +The '``sitofp``' instruction interprets its operand as a signed integer +quantity and converts it to the corresponding floating point value. If +the value cannot fit in the floating point value, the results are +undefined. + +Example: +"""""""" + +.. code-block:: llvm + + %X = sitofp i32 257 to float ; yields float:257.0 + %Y = sitofp i8 -1 to double ; yields double:-1.0 + +.. _i_ptrtoint: + +'``ptrtoint .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = ptrtoint to ; yields ty2 + +Overview: +""""""""" + +The '``ptrtoint``' instruction converts the pointer or a vector of +pointers ``value`` to the integer (or vector of integers) type ``ty2``. + +Arguments: +"""""""""" + +The '``ptrtoint``' instruction takes a ``value`` to cast, which must be +a a value of type :ref:`pointer ` or a vector of pointers, and a +type to cast it to ``ty2``, which must be an :ref:`integer ` or +a vector of integers type. + +Semantics: +"""""""""" + +The '``ptrtoint``' instruction converts ``value`` to integer type +``ty2`` by interpreting the pointer value as an integer and either +truncating or zero extending that value to the size of the integer type. +If ``value`` is smaller than ``ty2`` then a zero extension is done. If +``value`` is larger than ``ty2`` then a truncation is done. If they are +the same size, then nothing is done (*no-op cast*) other than a type +change. + +Example: +"""""""" + +.. code-block:: llvm + + %X = ptrtoint i32* %P to i8 ; yields truncation on 32-bit architecture + %Y = ptrtoint i32* %P to i64 ; yields zero extension on 32-bit architecture + %Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture + +.. _i_inttoptr: + +'``inttoptr .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = inttoptr to ; yields ty2 + +Overview: +""""""""" + +The '``inttoptr``' instruction converts an integer ``value`` to a +pointer type, ``ty2``. + +Arguments: +"""""""""" + +The '``inttoptr``' instruction takes an :ref:`integer ` value to +cast, and a type to cast it to, which must be a :ref:`pointer ` +type. + +Semantics: +"""""""""" + +The '``inttoptr``' instruction converts ``value`` to type ``ty2`` by +applying either a zero extension or a truncation depending on the size +of the integer ``value``. If ``value`` is larger than the size of a +pointer then a truncation is done. If ``value`` is smaller than the size +of a pointer then a zero extension is done. If they are the same size, +nothing is done (*no-op cast*). + +Example: +"""""""" + +.. code-block:: llvm + + %X = inttoptr i32 255 to i32* ; yields zero extension on 64-bit architecture + %Y = inttoptr i32 255 to i32* ; yields no-op on 32-bit architecture + %Z = inttoptr i64 0 to i32* ; yields truncation on 32-bit architecture + %Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers + +.. _i_bitcast: + +'``bitcast .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = bitcast to ; yields ty2 + +Overview: +""""""""" + +The '``bitcast``' instruction converts ``value`` to type ``ty2`` without +changing any bits. + +Arguments: +"""""""""" + +The '``bitcast``' instruction takes a value to cast, which must be a +non-aggregate first class value, and a type to cast it to, which must +also be a non-aggregate :ref:`first class ` type. The bit +sizes of ``value`` and the destination type, ``ty2``, must be identical. +If the source type is a pointer, the destination type must also be a +pointer. This instruction supports bitwise conversion of vectors to +integers and to vectors of other types (as long as they have the same +size). + +Semantics: +"""""""""" + +The '``bitcast``' instruction converts ``value`` to type ``ty2``. It is +always a *no-op cast* because no bits change with this conversion. The +conversion is done as if the ``value`` had been stored to memory and +read back as type ``ty2``. Pointer (or vector of pointers) types may +only be converted to other pointer (or vector of pointers) types with +this instruction. To convert pointers to other types, use the +:ref:`inttoptr ` or :ref:`ptrtoint ` instructions +first. + +Example: +"""""""" + +.. code-block:: llvm + + %X = bitcast i8 255 to i8 ; yields i8 :-1 + %Y = bitcast i32* %x to sint* ; yields sint*:%x + %Z = bitcast <2 x int> %V to i64; ; yields i64: %V + %Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*> + +.. _otherops: + +Other Operations +---------------- + +The instructions in this category are the "miscellaneous" instructions, +which defy better classification. + +.. _i_icmp: + +'``icmp``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = icmp , ; yields {i1} or {}:result + +Overview: +""""""""" + +The '``icmp``' instruction returns a boolean value or a vector of +boolean values based on comparison of its two integer, integer vector, +pointer, or pointer vector operands. + +Arguments: +"""""""""" + +The '``icmp``' instruction takes three operands. The first operand is +the condition code indicating the kind of comparison to perform. It is +not a value, just a keyword. The possible condition code are: + +#. ``eq``: equal +#. ``ne``: not equal +#. ``ugt``: unsigned greater than +#. ``uge``: unsigned greater or equal +#. ``ult``: unsigned less than +#. ``ule``: unsigned less or equal +#. ``sgt``: signed greater than +#. ``sge``: signed greater or equal +#. ``slt``: signed less than +#. ``sle``: signed less or equal + +The remaining two arguments must be :ref:`integer ` or +:ref:`pointer ` or integer :ref:`vector ` typed. They +must also be identical types. + +Semantics: +"""""""""" + +The '``icmp``' compares ``op1`` and ``op2`` according to the condition +code given as ``cond``. The comparison performed always yields either an +:ref:`i1 ` or vector of ``i1`` result, as follows: + +#. ``eq``: yields ``true`` if the operands are equal, ``false`` + otherwise. No sign interpretation is necessary or performed. +#. ``ne``: yields ``true`` if the operands are unequal, ``false`` + otherwise. No sign interpretation is necessary or performed. +#. ``ugt``: interprets the operands as unsigned values and yields + ``true`` if ``op1`` is greater than ``op2``. +#. ``uge``: interprets the operands as unsigned values and yields + ``true`` if ``op1`` is greater than or equal to ``op2``. +#. ``ult``: interprets the operands as unsigned values and yields + ``true`` if ``op1`` is less than ``op2``. +#. ``ule``: interprets the operands as unsigned values and yields + ``true`` if ``op1`` is less than or equal to ``op2``. +#. ``sgt``: interprets the operands as signed values and yields ``true`` + if ``op1`` is greater than ``op2``. +#. ``sge``: interprets the operands as signed values and yields ``true`` + if ``op1`` is greater than or equal to ``op2``. +#. ``slt``: interprets the operands as signed values and yields ``true`` + if ``op1`` is less than ``op2``. +#. ``sle``: interprets the operands as signed values and yields ``true`` + if ``op1`` is less than or equal to ``op2``. + +If the operands are :ref:`pointer ` typed, the pointer values +are compared as if they were integers. + +If the operands are integer vectors, then they are compared element by +element. The result is an ``i1`` vector with the same number of elements +as the values being compared. Otherwise, the result is an ``i1``. + +Example: +"""""""" + +.. code-block:: llvm + + = icmp eq i32 4, 5 ; yields: result=false + = icmp ne float* %X, %X ; yields: result=false + = icmp ult i16 4, 5 ; yields: result=true + = icmp sgt i16 4, 5 ; yields: result=false + = icmp ule i16 -4, 5 ; yields: result=false + = icmp sge i16 4, 5 ; yields: result=false + +Note that the code generator does not yet support vector types with the +``icmp`` instruction. + +.. _i_fcmp: + +'``fcmp``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = fcmp , ; yields {i1} or {}:result + +Overview: +""""""""" + +The '``fcmp``' instruction returns a boolean value or vector of boolean +values based on comparison of its operands. + +If the operands are floating point scalars, then the result type is a +boolean (:ref:`i1 `). + +If the operands are floating point vectors, then the result type is a +vector of boolean with the same number of elements as the operands being +compared. + +Arguments: +"""""""""" + +The '``fcmp``' instruction takes three operands. The first operand is +the condition code indicating the kind of comparison to perform. It is +not a value, just a keyword. The possible condition code are: + +#. ``false``: no comparison, always returns false +#. ``oeq``: ordered and equal +#. ``ogt``: ordered and greater than +#. ``oge``: ordered and greater than or equal +#. ``olt``: ordered and less than +#. ``ole``: ordered and less than or equal +#. ``one``: ordered and not equal +#. ``ord``: ordered (no nans) +#. ``ueq``: unordered or equal +#. ``ugt``: unordered or greater than +#. ``uge``: unordered or greater than or equal +#. ``ult``: unordered or less than +#. ``ule``: unordered or less than or equal +#. ``une``: unordered or not equal +#. ``uno``: unordered (either nans) +#. ``true``: no comparison, always returns true + +*Ordered* means that neither operand is a QNAN while *unordered* means +that either operand may be a QNAN. + +Each of ``val1`` and ``val2`` arguments must be either a :ref:`floating +point ` type or a :ref:`vector ` of floating point +type. They must have identical types. + +Semantics: +"""""""""" + +The '``fcmp``' instruction compares ``op1`` and ``op2`` according to the +condition code given as ``cond``. If the operands are vectors, then the +vectors are compared element by element. Each comparison performed +always yields an :ref:`i1 ` result, as follows: + +#. ``false``: always yields ``false``, regardless of operands. +#. ``oeq``: yields ``true`` if both operands are not a QNAN and ``op1`` + is equal to ``op2``. +#. ``ogt``: yields ``true`` if both operands are not a QNAN and ``op1`` + is greater than ``op2``. +#. ``oge``: yields ``true`` if both operands are not a QNAN and ``op1`` + is greater than or equal to ``op2``. +#. ``olt``: yields ``true`` if both operands are not a QNAN and ``op1`` + is less than ``op2``. +#. ``ole``: yields ``true`` if both operands are not a QNAN and ``op1`` + is less than or equal to ``op2``. +#. ``one``: yields ``true`` if both operands are not a QNAN and ``op1`` + is not equal to ``op2``. +#. ``ord``: yields ``true`` if both operands are not a QNAN. +#. ``ueq``: yields ``true`` if either operand is a QNAN or ``op1`` is + equal to ``op2``. +#. ``ugt``: yields ``true`` if either operand is a QNAN or ``op1`` is + greater than ``op2``. +#. ``uge``: yields ``true`` if either operand is a QNAN or ``op1`` is + greater than or equal to ``op2``. +#. ``ult``: yields ``true`` if either operand is a QNAN or ``op1`` is + less than ``op2``. +#. ``ule``: yields ``true`` if either operand is a QNAN or ``op1`` is + less than or equal to ``op2``. +#. ``une``: yields ``true`` if either operand is a QNAN or ``op1`` is + not equal to ``op2``. +#. ``uno``: yields ``true`` if either operand is a QNAN. +#. ``true``: always yields ``true``, regardless of operands. + +Example: +"""""""" + +.. code-block:: llvm + + = fcmp oeq float 4.0, 5.0 ; yields: result=false + = fcmp one float 4.0, 5.0 ; yields: result=true + = fcmp olt float 4.0, 5.0 ; yields: result=true + = fcmp ueq double 1.0, 2.0 ; yields: result=false + +Note that the code generator does not yet support vector types with the +``fcmp`` instruction. + +.. _i_phi: + +'``phi``' Instruction +^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = phi [ , ], ... + +Overview: +""""""""" + +The '``phi``' instruction is used to implement the φ node in the SSA +graph representing the function. + +Arguments: +"""""""""" + +The type of the incoming values is specified with the first type field. +After this, the '``phi``' instruction takes a list of pairs as +arguments, with one pair for each predecessor basic block of the current +block. Only values of :ref:`first class ` type may be used as +the value arguments to the PHI node. Only labels may be used as the +label arguments. + +There must be no non-phi instructions between the start of a basic block +and the PHI instructions: i.e. PHI instructions must be first in a basic +block. + +For the purposes of the SSA form, the use of each incoming value is +deemed to occur on the edge from the corresponding predecessor block to +the current block (but after any definition of an '``invoke``' +instruction's return value on the same edge). + +Semantics: +"""""""""" + +At runtime, the '``phi``' instruction logically takes on the value +specified by the pair corresponding to the predecessor basic block that +executed just prior to the current block. + +Example: +"""""""" + +.. code-block:: llvm + + Loop: ; Infinite loop that counts from 0 on up... + %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ] + %nextindvar = add i32 %indvar, 1 + br label %Loop + +.. _i_select: + +'``select``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = select selty , , ; yields ty + + selty is either i1 or {} + +Overview: +""""""""" + +The '``select``' instruction is used to choose one value based on a +condition, without branching. + +Arguments: +"""""""""" + +The '``select``' instruction requires an 'i1' value or a vector of 'i1' +values indicating the condition, and two values of the same :ref:`first +class ` type. If the val1/val2 are vectors and the +condition is a scalar, then entire vectors are selected, not individual +elements. + +Semantics: +"""""""""" + +If the condition is an i1 and it evaluates to 1, the instruction returns +the first value argument; otherwise, it returns the second value +argument. + +If the condition is a vector of i1, then the value arguments must be +vectors of the same size, and the selection is done element by element. + +Example: +"""""""" + +.. code-block:: llvm + + %X = select i1 true, i8 17, i8 42 ; yields i8:17 + +.. _i_call: + +'``call``' Instruction +^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = [tail] call [cconv] [ret attrs] [*] () [fn attrs] + +Overview: +""""""""" + +The '``call``' instruction represents a simple function call. + +Arguments: +"""""""""" + +This instruction requires several arguments: + +#. The optional "tail" marker indicates that the callee function does + not access any allocas or varargs in the caller. Note that calls may + be marked "tail" even if they do not occur before a + :ref:`ret ` instruction. If the "tail" marker is present, the + function call is eligible for tail call optimization, but `might not + in fact be optimized into a jump `_. + The code generator may optimize calls marked "tail" with either 1) + automatic `sibling call + optimization `_ when the caller and + callee have matching signatures, or 2) forced tail call optimization + when the following extra requirements are met: + + - Caller and callee both have the calling convention ``fastcc``. + - The call is in tail position (ret immediately follows call and ret + uses value of call or is void). + - Option ``-tailcallopt`` is enabled, or + ``llvm::GuaranteedTailCallOpt`` is ``true``. + - `Platform specific constraints are + met. `_ + +#. The optional "cconv" marker indicates which :ref:`calling + convention ` the call should use. If none is + specified, the call defaults to using C calling conventions. The + calling convention of the call must match the calling convention of + the target function, or else the behavior is undefined. +#. The optional :ref:`Parameter Attributes ` list for return + values. Only '``zeroext``', '``signext``', and '``inreg``' attributes + are valid here. +#. '``ty``': the type of the call instruction itself which is also the + type of the return value. Functions that return no value are marked + ``void``. +#. '``fnty``': shall be the signature of the pointer to function value + being invoked. The argument types must match the types implied by + this signature. This type can be omitted if the function is not + varargs and if the function type does not return a pointer to a + function. +#. '``fnptrval``': An LLVM value containing a pointer to a function to + be invoked. In most cases, this is a direct function invocation, but + indirect ``call``'s are just as possible, calling an arbitrary pointer + to function value. +#. '``function args``': argument list whose types match the function + signature argument types and parameter attributes. All arguments must + be of :ref:`first class ` type. If the function signature + indicates the function accepts a variable number of arguments, the + extra arguments can be specified. +#. The optional :ref:`function attributes ` list. Only + '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``' + attributes are valid here. + +Semantics: +"""""""""" + +The '``call``' instruction is used to cause control flow to transfer to +a specified function, with its incoming arguments bound to the specified +values. Upon a '``ret``' instruction in the called function, control +flow continues with the instruction after the function call, and the +return value of the function is bound to the result argument. + +Example: +"""""""" + +.. code-block:: llvm + + %retval = call i32 @test(i32 %argc) + call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42) ; yields i32 + %X = tail call i32 @foo() ; yields i32 + %Y = tail call fastcc i32 @foo() ; yields i32 + call void %foo(i8 97 signext) + + %struct.A = type { i32, i8 } + %r = call %struct.A @foo() ; yields { 32, i8 } + %gr = extractvalue %struct.A %r, 0 ; yields i32 + %gr1 = extractvalue %struct.A %r, 1 ; yields i8 + %Z = call void @foo() noreturn ; indicates that %foo never returns normally + %ZZ = call zeroext i32 @bar() ; Return value is %zero extended + +llvm treats calls to some functions with names and arguments that match +the standard C99 library as being the C99 library functions, and may +perform optimizations or generate code for them under that assumption. +This is something we'd like to change in the future to provide better +support for freestanding environments and non-C-based languages. + +.. _i_va_arg: + +'``va_arg``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = va_arg , + +Overview: +""""""""" + +The '``va_arg``' instruction is used to access arguments passed through +the "variable argument" area of a function call. It is used to implement +the ``va_arg`` macro in C. + +Arguments: +"""""""""" + +This instruction takes a ``va_list*`` value and the type of the +argument. It returns a value of the specified argument type and +increments the ``va_list`` to point to the next argument. The actual +type of ``va_list`` is target specific. + +Semantics: +"""""""""" + +The '``va_arg``' instruction loads an argument of the specified type +from the specified ``va_list`` and causes the ``va_list`` to point to +the next argument. For more information, see the variable argument +handling :ref:`Intrinsic Functions `. + +It is legal for this instruction to be called in a function which does +not take a variable number of arguments, for example, the ``vfprintf`` +function. + +``va_arg`` is an LLVM instruction instead of an :ref:`intrinsic +function ` because it takes a type as an argument. + +Example: +"""""""" + +See the :ref:`variable argument processing ` section. + +Note that the code generator does not yet fully support va\_arg on many +targets. Also, it does not currently support va\_arg with aggregate +types on any target. + +.. _i_landingpad: + +'``landingpad``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = landingpad personality + + = landingpad personality cleanup * + + := catch + := filter + +Overview: +""""""""" + +The '``landingpad``' instruction is used by `LLVM's exception handling +system `_ to specify that a basic block +is a landing pad --- one where the exception lands, and corresponds to the +code found in the ``catch`` portion of a ``try``/``catch`` sequence. It +defines values supplied by the personality function (``pers_fn``) upon +re-entry to the function. The ``resultval`` has the type ``resultty``. + +Arguments: +"""""""""" + +This instruction takes a ``pers_fn`` value. This is the personality +function associated with the unwinding mechanism. The optional +``cleanup`` flag indicates that the landing pad block is a cleanup. + +A ``clause`` begins with the clause type --- ``catch`` or ``filter`` --- and +contains the global variable representing the "type" that may be caught +or filtered respectively. Unlike the ``catch`` clause, the ``filter`` +clause takes an array constant as its argument. Use +"``[0 x i8**] undef``" for a filter which cannot throw. The +'``landingpad``' instruction must contain *at least* one ``clause`` or +the ``cleanup`` flag. + +Semantics: +"""""""""" + +The '``landingpad``' instruction defines the values which are set by the +personality function (``pers_fn``) upon re-entry to the function, and +therefore the "result type" of the ``landingpad`` instruction. As with +calling conventions, how the personality function results are +represented in LLVM IR is target specific. + +The clauses are applied in order from top to bottom. If two +``landingpad`` instructions are merged together through inlining, the +clauses from the calling function are appended to the list of clauses. +When the call stack is being unwound due to an exception being thrown, +the exception is compared against each ``clause`` in turn. If it doesn't +match any of the clauses, and the ``cleanup`` flag is not set, then +unwinding continues further up the call stack. + +The ``landingpad`` instruction has several restrictions: + +- A landing pad block is a basic block which is the unwind destination + of an '``invoke``' instruction. +- A landing pad block must have a '``landingpad``' instruction as its + first non-PHI instruction. +- There can be only one '``landingpad``' instruction within the landing + pad block. +- A basic block that is not a landing pad block may not include a + '``landingpad``' instruction. +- All '``landingpad``' instructions in a function must have the same + personality function. + +Example: +"""""""" + +.. code-block:: llvm + + ;; A landing pad which can catch an integer. + %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + catch i8** @_ZTIi + ;; A landing pad that is a cleanup. + %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + cleanup + ;; A landing pad which can catch an integer and can only throw a double. + %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + catch i8** @_ZTIi + filter [1 x i8**] [@_ZTId] + +.. _intrinsics: + +Intrinsic Functions +=================== + +LLVM supports the notion of an "intrinsic function". These functions +have well known names and semantics and are required to follow certain +restrictions. Overall, these intrinsics represent an extension mechanism +for the LLVM language that does not require changing all of the +transformations in LLVM when adding to the language (or the bitcode +reader/writer, the parser, etc...). + +Intrinsic function names must all start with an "``llvm.``" prefix. This +prefix is reserved in LLVM for intrinsic names; thus, function names may +not begin with this prefix. Intrinsic functions must always be external +functions: you cannot define the body of intrinsic functions. Intrinsic +functions may only be used in call or invoke instructions: it is illegal +to take the address of an intrinsic function. Additionally, because +intrinsic functions are part of the LLVM language, it is required if any +are added that they be documented here. + +Some intrinsic functions can be overloaded, i.e., the intrinsic +represents a family of functions that perform the same operation but on +different data types. Because LLVM can represent over 8 million +different integer types, overloading is used commonly to allow an +intrinsic function to operate on any integer type. One or more of the +argument types or the result type can be overloaded to accept any +integer type. Argument types may also be defined as exactly matching a +previous argument's type or the result type. This allows an intrinsic +function which accepts multiple arguments, but needs all of them to be +of the same type, to only be overloaded with respect to a single +argument or the result. + +Overloaded intrinsics will have the names of its overloaded argument +types encoded into its function name, each preceded by a period. Only +those types which are overloaded result in a name suffix. Arguments +whose type is matched against another type do not. For example, the +``llvm.ctpop`` function can take an integer of any width and returns an +integer of exactly the same integer width. This leads to a family of +functions such as ``i8 @llvm.ctpop.i8(i8 %val)`` and +``i29 @llvm.ctpop.i29(i29 %val)``. Only one type, the return type, is +overloaded, and only one type suffix is required. Because the argument's +type is matched against the return type, it does not require its own +name suffix. + +To learn how to add an intrinsic function, please see the `Extending +LLVM Guide `_. + +.. _int_varargs: + +Variable Argument Handling Intrinsics +------------------------------------- + +Variable argument support is defined in LLVM with the +:ref:`va_arg ` instruction and these three intrinsic +functions. These functions are related to the similarly named macros +defined in the ```` header file. + +All of these functions operate on arguments that use a target-specific +value type "``va_list``". The LLVM assembly language reference manual +does not define what this type is, so all transformations should be +prepared to handle these functions regardless of the type used. + +This example shows how the :ref:`va_arg ` instruction and the +variable argument handling intrinsic functions are used. + +.. code-block:: llvm + + define i32 @test(i32 %X, ...) { + ; Initialize variable argument processing + %ap = alloca i8* + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + ; Read a single integer argument + %tmp = va_arg i8** %ap, i32 + + ; Demonstrate usage of llvm.va_copy and llvm.va_end + %aq = alloca i8* + %aq2 = bitcast i8** %aq to i8* + call void @llvm.va_copy(i8* %aq2, i8* %ap2) + call void @llvm.va_end(i8* %aq2) + + ; Stop processing of arguments. + call void @llvm.va_end(i8* %ap2) + ret i32 %tmp + } + + declare void @llvm.va_start(i8*) + declare void @llvm.va_copy(i8*, i8*) + declare void @llvm.va_end(i8*) + +.. _int_va_start: + +'``llvm.va_start``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void %llvm.va_start(i8* ) + +Overview: +""""""""" + +The '``llvm.va_start``' intrinsic initializes ``*`` for +subsequent use by ``va_arg``. + +Arguments: +"""""""""" + +The argument is a pointer to a ``va_list`` element to initialize. + +Semantics: +"""""""""" + +The '``llvm.va_start``' intrinsic works just like the ``va_start`` macro +available in C. In a target-dependent way, it initializes the +``va_list`` element to which the argument points, so that the next call +to ``va_arg`` will produce the first variable argument passed to the +function. Unlike the C ``va_start`` macro, this intrinsic does not need +to know the last argument of the function as the compiler can figure +that out. + +'``llvm.va_end``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.va_end(i8* ) + +Overview: +""""""""" + +The '``llvm.va_end``' intrinsic destroys ``*``, which has been +initialized previously with ``llvm.va_start`` or ``llvm.va_copy``. + +Arguments: +"""""""""" + +The argument is a pointer to a ``va_list`` to destroy. + +Semantics: +"""""""""" + +The '``llvm.va_end``' intrinsic works just like the ``va_end`` macro +available in C. In a target-dependent way, it destroys the ``va_list`` +element to which the argument points. Calls to +:ref:`llvm.va_start ` and +:ref:`llvm.va_copy ` must be matched exactly with calls to +``llvm.va_end``. + +.. _int_va_copy: + +'``llvm.va_copy``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.va_copy(i8* , i8* ) + +Overview: +""""""""" + +The '``llvm.va_copy``' intrinsic copies the current argument position +from the source argument list to the destination argument list. + +Arguments: +"""""""""" + +The first argument is a pointer to a ``va_list`` element to initialize. +The second argument is a pointer to a ``va_list`` element to copy from. + +Semantics: +"""""""""" + +The '``llvm.va_copy``' intrinsic works just like the ``va_copy`` macro +available in C. In a target-dependent way, it copies the source +``va_list`` element into the destination ``va_list`` element. This +intrinsic is necessary because the `` llvm.va_start`` intrinsic may be +arbitrarily complex and require, for example, memory allocation. + +Accurate Garbage Collection Intrinsics +-------------------------------------- + +LLVM support for `Accurate Garbage Collection `_ +(GC) requires the implementation and generation of these intrinsics. +These intrinsics allow identification of :ref:`GC roots on the +stack `, as well as garbage collector implementations that +require :ref:`read ` and :ref:`write ` barriers. +Front-ends for type-safe garbage collected languages should generate +these intrinsics to make use of the LLVM garbage collectors. For more +details, see `Accurate Garbage Collection with +LLVM `_. + +The garbage collection intrinsics only operate on objects in the generic +address space (address space zero). + +.. _int_gcroot: + +'``llvm.gcroot``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata) + +Overview: +""""""""" + +The '``llvm.gcroot``' intrinsic declares the existence of a GC root to +the code generator, and allows some metadata to be associated with it. + +Arguments: +"""""""""" + +The first argument specifies the address of a stack object that contains +the root pointer. The second pointer (which must be either a constant or +a global value address) contains the meta-data to be associated with the +root. + +Semantics: +"""""""""" + +At runtime, a call to this intrinsic stores a null pointer into the +"ptrloc" location. At compile-time, the code generator generates +information to allow the runtime to find the pointer at GC safe points. +The '``llvm.gcroot``' intrinsic may only be used in a function which +:ref:`specifies a GC algorithm `. + +.. _int_gcread: + +'``llvm.gcread``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr) + +Overview: +""""""""" + +The '``llvm.gcread``' intrinsic identifies reads of references from heap +locations, allowing garbage collector implementations that require read +barriers. + +Arguments: +"""""""""" + +The second argument is the address to read from, which should be an +address allocated from the garbage collector. The first object is a +pointer to the start of the referenced object, if needed by the language +runtime (otherwise null). + +Semantics: +"""""""""" + +The '``llvm.gcread``' intrinsic has the same semantics as a load +instruction, but may be replaced with substantially more complex code by +the garbage collector runtime, as needed. The '``llvm.gcread``' +intrinsic may only be used in a function which :ref:`specifies a GC +algorithm `. + +.. _int_gcwrite: + +'``llvm.gcwrite``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2) + +Overview: +""""""""" + +The '``llvm.gcwrite``' intrinsic identifies writes of references to heap +locations, allowing garbage collector implementations that require write +barriers (such as generational or reference counting collectors). + +Arguments: +"""""""""" + +The first argument is the reference to store, the second is the start of +the object to store it to, and the third is the address of the field of +Obj to store to. If the runtime does not require a pointer to the +object, Obj may be null. + +Semantics: +"""""""""" + +The '``llvm.gcwrite``' intrinsic has the same semantics as a store +instruction, but may be replaced with substantially more complex code by +the garbage collector runtime, as needed. The '``llvm.gcwrite``' +intrinsic may only be used in a function which :ref:`specifies a GC +algorithm `. + +Code Generator Intrinsics +------------------------- + +These intrinsics are provided by LLVM to expose special features that +may only be implemented with code generator support. + +'``llvm.returnaddress``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8 *@llvm.returnaddress(i32 ) + +Overview: +""""""""" + +The '``llvm.returnaddress``' intrinsic attempts to compute a +target-specific value indicating the return address of the current +function or one of its callers. + +Arguments: +"""""""""" + +The argument to this intrinsic indicates which function to return the +address for. Zero indicates the calling function, one indicates its +caller, etc. The argument is **required** to be a constant integer +value. + +Semantics: +"""""""""" + +The '``llvm.returnaddress``' intrinsic either returns a pointer +indicating the return address of the specified call frame, or zero if it +cannot be identified. The value returned by this intrinsic is likely to +be incorrect or 0 for arguments other than zero, so it should only be +used for debugging purposes. + +Note that calling this intrinsic does not prevent function inlining or +other aggressive transformations, so the value returned may not be that +of the obvious source-language caller. + +'``llvm.frameaddress``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.frameaddress(i32 ) + +Overview: +""""""""" + +The '``llvm.frameaddress``' intrinsic attempts to return the +target-specific frame pointer value for the specified stack frame. + +Arguments: +"""""""""" + +The argument to this intrinsic indicates which function to return the +frame pointer for. Zero indicates the calling function, one indicates +its caller, etc. The argument is **required** to be a constant integer +value. + +Semantics: +"""""""""" + +The '``llvm.frameaddress``' intrinsic either returns a pointer +indicating the frame address of the specified call frame, or zero if it +cannot be identified. The value returned by this intrinsic is likely to +be incorrect or 0 for arguments other than zero, so it should only be +used for debugging purposes. + +Note that calling this intrinsic does not prevent function inlining or +other aggressive transformations, so the value returned may not be that +of the obvious source-language caller. + +.. _int_stacksave: + +'``llvm.stacksave``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.stacksave() + +Overview: +""""""""" + +The '``llvm.stacksave``' intrinsic is used to remember the current state +of the function stack, for use with +:ref:`llvm.stackrestore `. This is useful for +implementing language features like scoped automatic variable sized +arrays in C99. + +Semantics: +"""""""""" + +This intrinsic returns a opaque pointer value that can be passed to +:ref:`llvm.stackrestore `. When an +``llvm.stackrestore`` intrinsic is executed with a value saved from +``llvm.stacksave``, it effectively restores the state of the stack to +the state it was in when the ``llvm.stacksave`` intrinsic executed. In +practice, this pops any :ref:`alloca ` blocks from the stack that +were allocated after the ``llvm.stacksave`` was executed. + +.. _int_stackrestore: + +'``llvm.stackrestore``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.stackrestore(i8* %ptr) + +Overview: +""""""""" + +The '``llvm.stackrestore``' intrinsic is used to restore the state of +the function stack to the state it was in when the corresponding +:ref:`llvm.stacksave ` intrinsic executed. This is +useful for implementing language features like scoped automatic variable +sized arrays in C99. + +Semantics: +"""""""""" + +See the description for :ref:`llvm.stacksave `. + +'``llvm.prefetch``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.prefetch(i8*
, i32 , i32 , i32 ) + +Overview: +""""""""" + +The '``llvm.prefetch``' intrinsic is a hint to the code generator to +insert a prefetch instruction if supported; otherwise, it is a noop. +Prefetches have no effect on the behavior of the program but can change +its performance characteristics. + +Arguments: +"""""""""" + +``address`` is the address to be prefetched, ``rw`` is the specifier +determining if the fetch should be for a read (0) or write (1), and +``locality`` is a temporal locality specifier ranging from (0) - no +locality, to (3) - extremely local keep in cache. The ``cache type`` +specifies whether the prefetch is performed on the data (1) or +instruction (0) cache. The ``rw``, ``locality`` and ``cache type`` +arguments must be constant integers. + +Semantics: +"""""""""" + +This intrinsic does not modify the behavior of the program. In +particular, prefetches cannot trap and do not produce a value. On +targets that support this intrinsic, the prefetch can provide hints to +the processor cache for better performance. + +'``llvm.pcmarker``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.pcmarker(i32 ) + +Overview: +""""""""" + +The '``llvm.pcmarker``' intrinsic is a method to export a Program +Counter (PC) in a region of code to simulators and other tools. The +method is target specific, but it is expected that the marker will use +exported symbols to transmit the PC of the marker. The marker makes no +guarantees that it will remain with any specific instruction after +optimizations. It is possible that the presence of a marker will inhibit +optimizations. The intended use is to be inserted after optimizations to +allow correlations of simulation runs. + +Arguments: +"""""""""" + +``id`` is a numerical id identifying the marker. + +Semantics: +"""""""""" + +This intrinsic does not modify the behavior of the program. Backends +that do not support this intrinsic may ignore it. + +'``llvm.readcyclecounter``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i64 @llvm.readcyclecounter() + +Overview: +""""""""" + +The '``llvm.readcyclecounter``' intrinsic provides access to the cycle +counter register (or similar low latency, high accuracy clocks) on those +targets that support it. On X86, it should map to RDTSC. On Alpha, it +should map to RPCC. As the backing counters overflow quickly (on the +order of 9 seconds on alpha), this should only be used for small +timings. + +Semantics: +"""""""""" + +When directly supported, reading the cycle counter should not modify any +memory. Implementations are allowed to either return a application +specific value or a system wide value. On backends without support, this +is lowered to a constant 0. + +Standard C Library Intrinsics +----------------------------- + +LLVM provides intrinsics for a few important standard C library +functions. These intrinsics allow source-language front-ends to pass +information about the alignment of the pointer arguments to the code +generator, providing opportunity for more efficient code generation. + +.. _int_memcpy: + +'``llvm.memcpy``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memcpy`` on any +integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* , + i32 , i32 , i1 ) + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* , i8* , + i64 , i32 , i1 ) + +Overview: +""""""""" + +The '``llvm.memcpy.*``' intrinsics copy a block of memory from the +source location to the destination location. + +Note that, unlike the standard libc function, the ``llvm.memcpy.*`` +intrinsics do not return a value, takes extra alignment/isvolatile +arguments and the pointers can be in specified address spaces. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is an integer argument +specifying the number of bytes to copy, the fourth argument is the +alignment of the source and destination locations, and the fifth is a +boolean indicating a volatile access. + +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that both the source and destination pointers +are aligned to that boundary. + +If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is +a :ref:`volatile operation `. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it. + +Semantics: +"""""""""" + +The '``llvm.memcpy.*``' intrinsics copy a block of memory from the +source location to the destination location, which are not allowed to +overlap. It copies "len" bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as the fourth +argument, otherwise it should be set to 0 or 1. + +'``llvm.memmove``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use llvm.memmove on any integer +bit width and for different address space. Not all targets support all +bit widths however. + +:: + + declare void @llvm.memmove.p0i8.p0i8.i32(i8* , i8* , + i32 , i32 , i1 ) + declare void @llvm.memmove.p0i8.p0i8.i64(i8* , i8* , + i64 , i32 , i1 ) + +Overview: +""""""""" + +The '``llvm.memmove.*``' intrinsics move a block of memory from the +source location to the destination location. It is similar to the +'``llvm.memcpy``' intrinsic but allows the two memory locations to +overlap. + +Note that, unlike the standard libc function, the ``llvm.memmove.*`` +intrinsics do not return a value, takes extra alignment/isvolatile +arguments and the pointers can be in specified address spaces. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is an integer argument +specifying the number of bytes to copy, the fourth argument is the +alignment of the source and destination locations, and the fifth is a +boolean indicating a volatile access. + +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that the source and destination pointers are +aligned to that boundary. + +If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call +is a :ref:`volatile operation `. The detailed access behavior is +not very cleanly specified and it is unwise to depend on it. + +Semantics: +"""""""""" + +The '``llvm.memmove.*``' intrinsics copy a block of memory from the +source location to the destination location, which may overlap. It +copies "len" bytes of memory over. If the argument is known to be +aligned to some boundary, this can be specified as the fourth argument, +otherwise it should be set to 0 or 1. + +'``llvm.memset.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use llvm.memset on any integer +bit width and for different address spaces. However, not all targets +support all bit widths. + +:: + + declare void @llvm.memset.p0i8.i32(i8* , i8 , + i32 , i32 , i1 ) + declare void @llvm.memset.p0i8.i64(i8* , i8 , + i64 , i32 , i1 ) + +Overview: +""""""""" + +The '``llvm.memset.*``' intrinsics fill a block of memory with a +particular byte value. + +Note that, unlike the standard libc function, the ``llvm.memset`` +intrinsic does not return a value and takes extra alignment/volatile +arguments. Also, the destination can be in an arbitrary address space. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination to fill, the second +is the byte value with which to fill it, the third argument is an +integer argument specifying the number of bytes to fill, and the fourth +argument is the known alignment of the destination location. + +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that the destination pointer is aligned to +that boundary. + +If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is +a :ref:`volatile operation `. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it. + +Semantics: +"""""""""" + +The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting +at the destination location. If the argument is known to be aligned to +some boundary, this can be specified as the fourth argument, otherwise +it should be set to 0 or 1. + +'``llvm.sqrt.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.sqrt.f32(float %Val) + declare double @llvm.sqrt.f64(double %Val) + declare x86_fp80 @llvm.sqrt.f80(x86_fp80 %Val) + declare fp128 @llvm.sqrt.f128(fp128 %Val) + declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand, +returning the same value as the libm '``sqrt``' functions would. Unlike +``sqrt`` in libm, however, ``llvm.sqrt`` has undefined behavior for +negative numbers other than -0.0 (which allows for better optimization, +because there is no need to worry about errno being set). +``llvm.sqrt(-0.0)`` is defined to return -0.0 like IEEE sqrt. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the sqrt of the specified operand if it is a +nonnegative floating point number. + +'``llvm.powi.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.powi`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.powi.f32(float %Val, i32 %power) + declare double @llvm.powi.f64(double %Val, i32 %power) + declare x86_fp80 @llvm.powi.f80(x86_fp80 %Val, i32 %power) + declare fp128 @llvm.powi.f128(fp128 %Val, i32 %power) + declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128 %Val, i32 %power) + +Overview: +""""""""" + +The '``llvm.powi.*``' intrinsics return the first operand raised to the +specified (positive or negative) power. The order of evaluation of +multiplications is not defined. When a vector of floating point type is +used, the second argument remains a scalar integer value. + +Arguments: +"""""""""" + +The second argument is an integer power, and the first is a value to +raise to that power. + +Semantics: +"""""""""" + +This function returns the first value raised to the second power with an +unspecified sequence of rounding operations. + +'``llvm.sin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sin`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.sin.f32(float %Val) + declare double @llvm.sin.f64(double %Val) + declare x86_fp80 @llvm.sin.f80(x86_fp80 %Val) + declare fp128 @llvm.sin.f128(fp128 %Val) + declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.sin.*``' intrinsics return the sine of the operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the sine of the specified operand, returning the +same values as the libm ``sin`` functions would, and handles error +conditions in the same way. + +'``llvm.cos.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.cos`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.cos.f32(float %Val) + declare double @llvm.cos.f64(double %Val) + declare x86_fp80 @llvm.cos.f80(x86_fp80 %Val) + declare fp128 @llvm.cos.f128(fp128 %Val) + declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.cos.*``' intrinsics return the cosine of the operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the cosine of the specified operand, returning the +same values as the libm ``cos`` functions would, and handles error +conditions in the same way. + +'``llvm.pow.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.pow`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.pow.f32(float %Val, float %Power) + declare double @llvm.pow.f64(double %Val, double %Power) + declare x86_fp80 @llvm.pow.f80(x86_fp80 %Val, x86_fp80 %Power) + declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power) + declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128 %Val, ppc_fp128 Power) + +Overview: +""""""""" + +The '``llvm.pow.*``' intrinsics return the first operand raised to the +specified (positive or negative) power. + +Arguments: +"""""""""" + +The second argument is a floating point power, and the first is a value +to raise to that power. + +Semantics: +"""""""""" + +This function returns the first value raised to the second power, +returning the same values as the libm ``pow`` functions would, and +handles error conditions in the same way. + +'``llvm.exp.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.exp`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.exp.f32(float %Val) + declare double @llvm.exp.f64(double %Val) + declare x86_fp80 @llvm.exp.f80(x86_fp80 %Val) + declare fp128 @llvm.exp.f128(fp128 %Val) + declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.exp.*``' intrinsics perform the exp function. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``exp`` functions +would, and handles error conditions in the same way. + +'``llvm.exp2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.exp2`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.exp2.f32(float %Val) + declare double @llvm.exp2.f64(double %Val) + declare x86_fp80 @llvm.exp2.f80(x86_fp80 %Val) + declare fp128 @llvm.exp2.f128(fp128 %Val) + declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.exp2.*``' intrinsics perform the exp2 function. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``exp2`` functions +would, and handles error conditions in the same way. + +'``llvm.log.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.log`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.log.f32(float %Val) + declare double @llvm.log.f64(double %Val) + declare x86_fp80 @llvm.log.f80(x86_fp80 %Val) + declare fp128 @llvm.log.f128(fp128 %Val) + declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.log.*``' intrinsics perform the log function. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log`` functions +would, and handles error conditions in the same way. + +'``llvm.log10.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.log10`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.log10.f32(float %Val) + declare double @llvm.log10.f64(double %Val) + declare x86_fp80 @llvm.log10.f80(x86_fp80 %Val) + declare fp128 @llvm.log10.f128(fp128 %Val) + declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.log10.*``' intrinsics perform the log10 function. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log10`` functions +would, and handles error conditions in the same way. + +'``llvm.log2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.log2`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.log2.f32(float %Val) + declare double @llvm.log2.f64(double %Val) + declare x86_fp80 @llvm.log2.f80(x86_fp80 %Val) + declare fp128 @llvm.log2.f128(fp128 %Val) + declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.log2.*``' intrinsics perform the log2 function. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``log2`` functions +would, and handles error conditions in the same way. + +'``llvm.fma.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.fma`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.fma.f32(float %a, float %b, float %c) + declare double @llvm.fma.f64(double %a, double %b, double %c) + declare x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) + declare fp128 @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c) + declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c) + +Overview: +""""""""" + +The '``llvm.fma.*``' intrinsics perform the fused multiply-add +operation. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``fma`` functions +would. + +'``llvm.fabs.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.fabs`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.fabs.f32(float %Val) + declare double @llvm.fabs.f64(double %Val) + declare x86_fp80 @llvm.fabs.f80(x86_fp80 %Val) + declare fp128 @llvm.fabs.f128(fp128 %Val) + declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.fabs.*``' intrinsics return the absolute value of the +operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``fabs`` functions +would, and handles error conditions in the same way. + +'``llvm.floor.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.floor`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.floor.f32(float %Val) + declare double @llvm.floor.f64(double %Val) + declare x86_fp80 @llvm.floor.f80(x86_fp80 %Val) + declare fp128 @llvm.floor.f128(fp128 %Val) + declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.floor.*``' intrinsics return the floor of the operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``floor`` functions +would, and handles error conditions in the same way. + +'``llvm.ceil.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ceil`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.ceil.f32(float %Val) + declare double @llvm.ceil.f64(double %Val) + declare x86_fp80 @llvm.ceil.f80(x86_fp80 %Val) + declare fp128 @llvm.ceil.f128(fp128 %Val) + declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.ceil.*``' intrinsics return the ceiling of the operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``ceil`` functions +would, and handles error conditions in the same way. + +'``llvm.trunc.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.trunc`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.trunc.f32(float %Val) + declare double @llvm.trunc.f64(double %Val) + declare x86_fp80 @llvm.trunc.f80(x86_fp80 %Val) + declare fp128 @llvm.trunc.f128(fp128 %Val) + declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.trunc.*``' intrinsics returns the operand rounded to the +nearest integer not larger in magnitude than the operand. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``trunc`` functions +would, and handles error conditions in the same way. + +'``llvm.rint.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.rint`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.rint.f32(float %Val) + declare double @llvm.rint.f64(double %Val) + declare x86_fp80 @llvm.rint.f80(x86_fp80 %Val) + declare fp128 @llvm.rint.f128(fp128 %Val) + declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.rint.*``' intrinsics returns the operand rounded to the +nearest integer. It may raise an inexact floating-point exception if the +operand isn't an integer. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``rint`` functions +would, and handles error conditions in the same way. + +'``llvm.nearbyint.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.nearbyint`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.nearbyint.f32(float %Val) + declare double @llvm.nearbyint.f64(double %Val) + declare x86_fp80 @llvm.nearbyint.f80(x86_fp80 %Val) + declare fp128 @llvm.nearbyint.f128(fp128 %Val) + declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.nearbyint.*``' intrinsics returns the operand rounded to the +nearest integer. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``nearbyint`` +functions would, and handles error conditions in the same way. + +Bit Manipulation Intrinsics +--------------------------- + +LLVM provides intrinsics for a few important bit manipulation +operations. These allow efficient code generation for some algorithms. + +'``llvm.bswap.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic function. You can use bswap on any +integer type that is an even number of bytes (i.e. BitWidth % 16 == 0). + +:: + + declare i16 @llvm.bswap.i16(i16 ) + declare i32 @llvm.bswap.i32(i32 ) + declare i64 @llvm.bswap.i64(i64 ) + +Overview: +""""""""" + +The '``llvm.bswap``' family of intrinsics is used to byte swap integer +values with an even number of bytes (positive multiple of 16 bits). +These are useful for performing operations on data that is not in the +target's native byte order. + +Semantics: +"""""""""" + +The ``llvm.bswap.i16`` intrinsic returns an i16 value that has the high +and low byte of the input i16 swapped. Similarly, the ``llvm.bswap.i32`` +intrinsic returns an i32 value that has the four bytes of the input i32 +swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the +returned i32 will have its bytes in 3, 2, 1, 0 order. The +``llvm.bswap.i48``, ``llvm.bswap.i64`` and other intrinsics extend this +concept to additional even-byte lengths (6 bytes, 8 bytes and more, +respectively). + +'``llvm.ctpop.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use llvm.ctpop on any integer +bit width, or on any vector with integer elements. Not all targets +support all bit widths or vector types, however. + +:: + + declare i8 @llvm.ctpop.i8(i8 ) + declare i16 @llvm.ctpop.i16(i16 ) + declare i32 @llvm.ctpop.i32(i32 ) + declare i64 @llvm.ctpop.i64(i64 ) + declare i256 @llvm.ctpop.i256(i256 ) + declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> ) + +Overview: +""""""""" + +The '``llvm.ctpop``' family of intrinsics counts the number of bits set +in a value. + +Arguments: +"""""""""" + +The only argument is the value to be counted. The argument may be of any +integer type, or a vector with integer elements. The return type must +match the argument type. + +Semantics: +"""""""""" + +The '``llvm.ctpop``' intrinsic counts the 1's in a variable, or within +each element of a vector. + +'``llvm.ctlz.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ctlz`` on any +integer bit width, or any vector whose elements are integers. Not all +targets support all bit widths or vector types, however. + +:: + + declare i8 @llvm.ctlz.i8 (i8 , i1 ) + declare i16 @llvm.ctlz.i16 (i16 , i1 ) + declare i32 @llvm.ctlz.i32 (i32 , i1 ) + declare i64 @llvm.ctlz.i64 (i64 , i1 ) + declare i256 @llvm.ctlz.i256(i256 , i1 ) + declase <2 x i32> @llvm.ctlz.v2i32(<2 x i32> , i1 ) + +Overview: +""""""""" + +The '``llvm.ctlz``' family of intrinsic functions counts the number of +leading zeros in a variable. + +Arguments: +"""""""""" + +The first argument is the value to be counted. This argument may be of +any integer type, or a vectory with integer element type. The return +type must match the first argument type. + +The second argument must be a constant and is a flag to indicate whether +the intrinsic should ensure that a zero as the first argument produces a +defined result. Historically some architectures did not provide a +defined result for zero values as efficiently, and many algorithms are +now predicated on avoiding zero-value inputs. + +Semantics: +"""""""""" + +The '``llvm.ctlz``' intrinsic counts the leading (most significant) +zeros in a variable, or within each element of the vector. If +``src == 0`` then the result is the size in bits of the type of ``src`` +if ``is_zero_undef == 0`` and ``undef`` otherwise. For example, +``llvm.ctlz(i32 2) = 30``. + +'``llvm.cttz.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.cttz`` on any +integer bit width, or any vector of integer elements. Not all targets +support all bit widths or vector types, however. + +:: + + declare i8 @llvm.cttz.i8 (i8 , i1 ) + declare i16 @llvm.cttz.i16 (i16 , i1 ) + declare i32 @llvm.cttz.i32 (i32 , i1 ) + declare i64 @llvm.cttz.i64 (i64 , i1 ) + declare i256 @llvm.cttz.i256(i256 , i1 ) + declase <2 x i32> @llvm.cttz.v2i32(<2 x i32> , i1 ) + +Overview: +""""""""" + +The '``llvm.cttz``' family of intrinsic functions counts the number of +trailing zeros. + +Arguments: +"""""""""" + +The first argument is the value to be counted. This argument may be of +any integer type, or a vectory with integer element type. The return +type must match the first argument type. + +The second argument must be a constant and is a flag to indicate whether +the intrinsic should ensure that a zero as the first argument produces a +defined result. Historically some architectures did not provide a +defined result for zero values as efficiently, and many algorithms are +now predicated on avoiding zero-value inputs. + +Semantics: +"""""""""" + +The '``llvm.cttz``' intrinsic counts the trailing (least significant) +zeros in a variable, or within each element of a vector. If ``src == 0`` +then the result is the size in bits of the type of ``src`` if +``is_zero_undef == 0`` and ``undef`` otherwise. For example, +``llvm.cttz(2) = 1``. + +Arithmetic with Overflow Intrinsics +----------------------------------- + +LLVM provides intrinsics for some arithmetic with overflow operations. + +'``llvm.sadd.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sadd.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.sadd.with.overflow``' family of intrinsic functions perform +a signed addition of the two arguments, and indicate whether an overflow +occurred during the signed summation. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo signed +addition. + +Semantics: +"""""""""" + +The '``llvm.sadd.with.overflow``' family of intrinsic functions perform +a signed addition of the two variables. They return a structure --- the +first element of which is the signed summation, and the second element +of which is a bit specifying if the signed summation resulted in an +overflow. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %overflow, label %normal + +'``llvm.uadd.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.uadd.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.uadd.with.overflow``' family of intrinsic functions perform +an unsigned addition of the two arguments, and indicate whether a carry +occurred during the unsigned summation. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned +addition. + +Semantics: +"""""""""" + +The '``llvm.uadd.with.overflow``' family of intrinsic functions perform +an unsigned addition of the two arguments. They return a structure --- the +first element of which is the sum, and the second element of which is a +bit specifying if the unsigned summation resulted in a carry. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %carry, label %normal + +'``llvm.ssub.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ssub.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.ssub.with.overflow``' family of intrinsic functions perform +a signed subtraction of the two arguments, and indicate whether an +overflow occurred during the signed subtraction. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo signed +subtraction. + +Semantics: +"""""""""" + +The '``llvm.ssub.with.overflow``' family of intrinsic functions perform +a signed subtraction of the two arguments. They return a structure --- the +first element of which is the subtraction, and the second element of +which is a bit specifying if the signed subtraction resulted in an +overflow. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %overflow, label %normal + +'``llvm.usub.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.usub.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.usub.with.overflow``' family of intrinsic functions perform +an unsigned subtraction of the two arguments, and indicate whether an +overflow occurred during the unsigned subtraction. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned +subtraction. + +Semantics: +"""""""""" + +The '``llvm.usub.with.overflow``' family of intrinsic functions perform +an unsigned subtraction of the two arguments. They return a structure --- +the first element of which is the subtraction, and the second element of +which is a bit specifying if the unsigned subtraction resulted in an +overflow. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %overflow, label %normal + +'``llvm.smul.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.smul.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.smul.with.overflow``' family of intrinsic functions perform +a signed multiplication of the two arguments, and indicate whether an +overflow occurred during the signed multiplication. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo signed +multiplication. + +Semantics: +"""""""""" + +The '``llvm.smul.with.overflow``' family of intrinsic functions perform +a signed multiplication of the two arguments. They return a structure --- +the first element of which is the multiplication, and the second element +of which is a bit specifying if the signed multiplication resulted in an +overflow. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %overflow, label %normal + +'``llvm.umul.with.overflow.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.umul.with.overflow`` +on any integer bit width. + +:: + + declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b) + declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b) + declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + +Overview: +""""""""" + +The '``llvm.umul.with.overflow``' family of intrinsic functions perform +a unsigned multiplication of the two arguments, and indicate whether an +overflow occurred during the unsigned multiplication. + +Arguments: +"""""""""" + +The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned +multiplication. + +Semantics: +"""""""""" + +The '``llvm.umul.with.overflow``' family of intrinsic functions perform +an unsigned multiplication of the two arguments. They return a structure --- +the first element of which is the multiplication, and the second +element of which is a bit specifying if the unsigned multiplication +resulted in an overflow. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b) + %sum = extractvalue {i32, i1} %res, 0 + %obit = extractvalue {i32, i1} %res, 1 + br i1 %obit, label %overflow, label %normal + +Specialised Arithmetic Intrinsics +--------------------------------- + +'``llvm.fmuladd.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare float @llvm.fmuladd.f32(float %a, float %b, float %c) + declare double @llvm.fmuladd.f64(double %a, double %b, double %c) + +Overview: +""""""""" + +The '``llvm.fmuladd.*``' intrinsic functions represent multiply-add +expressions that can be fused if the code generator determines that (a) the +target instruction set has support for a fused operation, and (b) that the +fused operation is more efficient than the equivalent, separate pair of mul +and add instructions. + +Arguments: +"""""""""" + +The '``llvm.fmuladd.*``' intrinsics each take three arguments: two +multiplicands, a and b, and an addend c. + +Semantics: +"""""""""" + +The expression: + +:: + + %0 = call float @llvm.fmuladd.f32(%a, %b, %c) + +is equivalent to the expression a \* b + c, except that rounding will +not be performed between the multiplication and addition steps if the +code generator fuses the operations. Fusion is not guaranteed, even if +the target platform supports it. If a fused multiply-add is required the +corresponding llvm.fma.\* intrinsic function should be used instead. + +Examples: +""""""""" + +.. code-block:: llvm + + %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c + +Half Precision Floating Point Intrinsics +---------------------------------------- + +For most target platforms, half precision floating point is a +storage-only format. This means that it is a dense encoding (in memory) +but does not support computation in the format. + +This means that code must first load the half-precision floating point +value as an i16, then convert it to float with +:ref:`llvm.convert.from.fp16 `. Computation can +then be performed on the float value (including extending to double +etc). To store the value back to memory, it is first converted to float +if needed, then converted to i16 with +:ref:`llvm.convert.to.fp16 `, then storing as an +i16 value. + +.. _int_convert_to_fp16: + +'``llvm.convert.to.fp16``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i16 @llvm.convert.to.fp16(f32 %a) + +Overview: +""""""""" + +The '``llvm.convert.to.fp16``' intrinsic function performs a conversion +from single precision floating point format to half precision floating +point format. + +Arguments: +"""""""""" + +The intrinsic function contains single argument - the value to be +converted. + +Semantics: +"""""""""" + +The '``llvm.convert.to.fp16``' intrinsic function performs a conversion +from single precision floating point format to half precision floating +point format. The return value is an ``i16`` which contains the +converted number. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call i16 @llvm.convert.to.fp16(f32 %a) + store i16 %res, i16* @x, align 2 + +.. _int_convert_from_fp16: + +'``llvm.convert.from.fp16``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare f32 @llvm.convert.from.fp16(i16 %a) + +Overview: +""""""""" + +The '``llvm.convert.from.fp16``' intrinsic function performs a +conversion from half precision floating point format to single precision +floating point format. + +Arguments: +"""""""""" + +The intrinsic function contains single argument - the value to be +converted. + +Semantics: +"""""""""" + +The '``llvm.convert.from.fp16``' intrinsic function performs a +conversion from half single precision floating point format to single +precision floating point format. The input half-float value is +represented by an ``i16`` value. + +Examples: +""""""""" + +.. code-block:: llvm + + %a = load i16* @x, align 2 + %res = call f32 @llvm.convert.from.fp16(i16 %a) + +Debugger Intrinsics +------------------- + +The LLVM debugger intrinsics (which all start with ``llvm.dbg.`` +prefix), are described in the `LLVM Source Level +Debugging `_ +document. + +Exception Handling Intrinsics +----------------------------- + +The LLVM exception handling intrinsics (which all start with +``llvm.eh.`` prefix), are described in the `LLVM Exception +Handling `_ document. + +.. _int_trampoline: + +Trampoline Intrinsics +--------------------- + +These intrinsics make it possible to excise one parameter, marked with +the :ref:`nest ` attribute, from a function. The result is a +callable function pointer lacking the nest parameter - the caller does +not need to provide a value for it. Instead, the value to use is stored +in advance in a "trampoline", a block of memory usually allocated on the +stack, which also contains code to splice the nest value into the +argument list. This is used to implement the GCC nested function address +extension. + +For example, if the function is ``i32 f(i8* nest %c, i32 %x, i32 %y)`` +then the resulting function pointer has signature ``i32 (i32, i32)*``. +It can be created as follows: + +.. code-block:: llvm + + %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86 + %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0 + call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval) + %p = call i8* @llvm.adjust.trampoline(i8* %tramp1) + %fp = bitcast i8* %p to i32 (i32, i32)* + +The call ``%val = call i32 %fp(i32 %x, i32 %y)`` is then equivalent to +``%val = call i32 %f(i8* %nval, i32 %x, i32 %y)``. + +.. _int_it: + +'``llvm.init.trampoline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.init.trampoline(i8* , i8* , i8* ) + +Overview: +""""""""" + +This fills the memory pointed to by ``tramp`` with executable code, +turning it into a trampoline. + +Arguments: +"""""""""" + +The ``llvm.init.trampoline`` intrinsic takes three arguments, all +pointers. The ``tramp`` argument must point to a sufficiently large and +sufficiently aligned block of memory; this memory is written to by the +intrinsic. Note that the size and the alignment are target-specific - +LLVM currently provides no portable way of determining them, so a +front-end that generates this intrinsic needs to have some +target-specific knowledge. The ``func`` argument must hold a function +bitcast to an ``i8*``. + +Semantics: +"""""""""" + +The block of memory pointed to by ``tramp`` is filled with target +dependent code, turning it into a function. Then ``tramp`` needs to be +passed to :ref:`llvm.adjust.trampoline ` to get a pointer which can +be :ref:`bitcast (to a new function) and called `. The new +function's signature is the same as that of ``func`` with any arguments +marked with the ``nest`` attribute removed. At most one such ``nest`` +argument is allowed, and it must be of pointer type. Calling the new +function is equivalent to calling ``func`` with the same argument list, +but with ``nval`` used for the missing ``nest`` argument. If, after +calling ``llvm.init.trampoline``, the memory pointed to by ``tramp`` is +modified, then the effect of any later call to the returned function +pointer is undefined. + +.. _int_at: + +'``llvm.adjust.trampoline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.adjust.trampoline(i8* ) + +Overview: +""""""""" + +This performs any required machine-specific adjustment to the address of +a trampoline (passed as ``tramp``). + +Arguments: +"""""""""" + +``tramp`` must point to a block of memory which already has trampoline +code filled in by a previous call to +:ref:`llvm.init.trampoline `. + +Semantics: +"""""""""" + +On some architectures the address of the code to be executed needs to be +different to the address where the trampoline is actually stored. This +intrinsic returns the executable address corresponding to ``tramp`` +after performing the required machine specific adjustments. The pointer +returned can then be :ref:`bitcast and executed `. + +Memory Use Markers +------------------ + +This class of intrinsics exists to information about the lifetime of +memory objects and ranges where variables are immutable. + +'``llvm.lifetime.start``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.lifetime.start(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.lifetime.start``' intrinsic specifies the start of a memory +object's lifetime. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that before this point in the code, the value +of the memory pointed to by ``ptr`` is dead. This means that it is known +to never be used and has an undefined value. A load from the pointer +that precedes this intrinsic can be replaced with ``'undef'``. + +'``llvm.lifetime.end``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.lifetime.end(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.lifetime.end``' intrinsic specifies the end of a memory +object's lifetime. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that after this point in the code, the value of +the memory pointed to by ``ptr`` is dead. This means that it is known to +never be used and has an undefined value. Any stores into the memory +object following this intrinsic may be removed as dead. + +'``llvm.invariant.start``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare {}* @llvm.invariant.start(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.invariant.start``' intrinsic specifies that the contents of +a memory object will not change. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that until an ``llvm.invariant.end`` that uses +the return value, the referenced memory location is constant and +unchanging. + +'``llvm.invariant.end``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.invariant.end({}* , i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.invariant.end``' intrinsic specifies that the contents of a +memory object are mutable. + +Arguments: +"""""""""" + +The first argument is the matching ``llvm.invariant.start`` intrinsic. +The second argument is a constant integer representing the size of the +object, or -1 if it is variable sized and the third argument is a +pointer to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that the memory is mutable again. + +General Intrinsics +------------------ + +This class of intrinsics is designed to be generic and has no specific +purpose. + +'``llvm.var.annotation``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.var.annotation(i8* , i8* , i8* , i32 ) + +Overview: +""""""""" + +The '``llvm.var.annotation``' intrinsic. + +Arguments: +"""""""""" + +The first argument is a pointer to a value, the second is a pointer to a +global string, the third is a pointer to a global string which is the +source file name, and the last argument is the line number. + +Semantics: +"""""""""" + +This intrinsic allows annotation of local variables with arbitrary +strings. This can be useful for special purpose optimizations that want +to look for these annotations. These have no other defined use; they are +ignored by code generation and optimization. + +'``llvm.ptr.annotation.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use '``llvm.ptr.annotation``' on a +pointer to an integer of any width. *NOTE* you must specify an address space for +the pointer. The identifier for the default address space is the integer +'``0``'. + +:: + + declare i8* @llvm.ptr.annotation.p
i8(i8* , i8* , i8* , i32 ) + declare i16* @llvm.ptr.annotation.p
i16(i16* , i8* , i8* , i32 ) + declare i32* @llvm.ptr.annotation.p
i32(i32* , i8* , i8* , i32 ) + declare i64* @llvm.ptr.annotation.p
i64(i64* , i8* , i8* , i32 ) + declare i256* @llvm.ptr.annotation.p
i256(i256* , i8* , i8* , i32 ) + +Overview: +""""""""" + +The '``llvm.ptr.annotation``' intrinsic. + +Arguments: +"""""""""" + +The first argument is a pointer to an integer value of arbitrary bitwidth +(result of some expression), the second is a pointer to a global string, the +third is a pointer to a global string which is the source file name, and the +last argument is the line number. It returns the value of the first argument. + +Semantics: +"""""""""" + +This intrinsic allows annotation of a pointer to an integer with arbitrary +strings. This can be useful for special purpose optimizations that want to look +for these annotations. These have no other defined use; they are ignored by code +generation and optimization. + +'``llvm.annotation.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use '``llvm.annotation``' on +any integer bit width. + +:: + + declare i8 @llvm.annotation.i8(i8 , i8* , i8* , i32 ) + declare i16 @llvm.annotation.i16(i16 , i8* , i8* , i32 ) + declare i32 @llvm.annotation.i32(i32 , i8* , i8* , i32 ) + declare i64 @llvm.annotation.i64(i64 , i8* , i8* , i32 ) + declare i256 @llvm.annotation.i256(i256 , i8* , i8* , i32 ) + +Overview: +""""""""" + +The '``llvm.annotation``' intrinsic. + +Arguments: +"""""""""" + +The first argument is an integer value (result of some expression), the +second is a pointer to a global string, the third is a pointer to a +global string which is the source file name, and the last argument is +the line number. It returns the value of the first argument. + +Semantics: +"""""""""" + +This intrinsic allows annotations to be put on arbitrary expressions +with arbitrary strings. This can be useful for special purpose +optimizations that want to look for these annotations. These have no +other defined use; they are ignored by code generation and optimization. + +'``llvm.trap``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.trap() noreturn nounwind + +Overview: +""""""""" + +The '``llvm.trap``' intrinsic. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +This intrinsic is lowered to the target dependent trap instruction. If +the target does not have a trap instruction, this intrinsic will be +lowered to a call of the ``abort()`` function. + +'``llvm.debugtrap``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.debugtrap() nounwind + +Overview: +""""""""" + +The '``llvm.debugtrap``' intrinsic. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +This intrinsic is lowered to code which is intended to cause an +execution trap with the intention of requesting the attention of a +debugger. + +'``llvm.stackprotector``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.stackprotector(i8* , i8** ) + +Overview: +""""""""" + +The ``llvm.stackprotector`` intrinsic takes the ``guard`` and stores it +onto the stack at ``slot``. The stack slot is adjusted to ensure that it +is placed on the stack before local variables. + +Arguments: +"""""""""" + +The ``llvm.stackprotector`` intrinsic requires two pointer arguments. +The first argument is the value loaded from the stack guard +``@__stack_chk_guard``. The second variable is an ``alloca`` that has +enough space to hold the value of the guard. + +Semantics: +"""""""""" + +This intrinsic causes the prologue/epilogue inserter to force the +position of the ``AllocaInst`` stack slot to be before local variables +on the stack. This is to ensure that if a local variable on the stack is +overwritten, it will destroy the value of the guard. When the function +exits, the guard on the stack is checked against the original guard. If +they are different, then the program aborts by calling the +``__stack_chk_fail()`` function. + +'``llvm.objectsize``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.objectsize.i32(i8* , i1 ) + declare i64 @llvm.objectsize.i64(i8* , i1 ) + +Overview: +""""""""" + +The ``llvm.objectsize`` intrinsic is designed to provide information to +the optimizers to determine at compile time whether a) an operation +(like memcpy) will overflow a buffer that corresponds to an object, or +b) that a runtime check for overflow isn't necessary. An object in this +context means an allocation of a specific class, structure, array, or +other object. + +Arguments: +"""""""""" + +The ``llvm.objectsize`` intrinsic takes two arguments. The first +argument is a pointer to or into the ``object``. The second argument is +a boolean and determines whether ``llvm.objectsize`` returns 0 (if true) +or -1 (if false) when the object size is unknown. The second argument +only accepts constants. + +Semantics: +"""""""""" + +The ``llvm.objectsize`` intrinsic is lowered to a constant representing +the size of the object concerned. If the size cannot be determined at +compile time, ``llvm.objectsize`` returns ``i32/i64 -1 or 0`` (depending +on the ``min`` argument). + +'``llvm.expect``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.expect.i32(i32 , i32 ) + declare i64 @llvm.expect.i64(i64 , i64 ) + +Overview: +""""""""" + +The ``llvm.expect`` intrinsic provides information about expected (the +most probable) value of ``val``, which can be used by optimizers. + +Arguments: +"""""""""" + +The ``llvm.expect`` intrinsic takes two arguments. The first argument is +a value. The second argument is an expected value, this needs to be a +constant value, variables are not allowed. + +Semantics: +"""""""""" + +This intrinsic is lowered to the ``val``. + +'``llvm.donothing``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.donothing() nounwind readnone + +Overview: +""""""""" + +The ``llvm.donothing`` intrinsic doesn't perform any operation. It's the +only intrinsic that can be called with an invoke instruction. + +Arguments: +"""""""""" + +None. + +Semantics: +"""""""""" + +This intrinsic does nothing, and it's removed by optimizers and ignored +by codegen. diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst index d568c0b302ec..11f1341f5cbd 100644 --- a/docs/Lexicon.rst +++ b/docs/Lexicon.rst @@ -1,5 +1,3 @@ -.. _lexicon: - ================ The LLVM Lexicon ================ @@ -17,11 +15,28 @@ A **ADCE** Aggressive Dead Code Elimination +**AST** + Abstract Syntax Tree. + + Due to Clang's influence (mostly the fact that parsing and semantic + analysis are so intertwined for C and especially C++), the typical + working definition of AST in the LLVM community is roughly "the + compiler's first complete symbolic (as opposed to textual) + representation of an input program". + As such, an "AST" might be a more general graph instead of a "tree" + (consider the symbolic representation for the type of a typical "linked + list node"). This working definition is closer to what some authors + call an "annotated abstract syntax tree". + + Consult your favorite compiler book or search engine for more details. + B - +.. _lexicon-bb-vectorization: + **BB Vectorization** - Basic Block Vectorization + Basic-Block Vectorization **BURS** Bottom Up Rewriting System --- A method of instruction selection for code @@ -185,6 +200,10 @@ S **SCCP** Sparse Conditional Constant Propagation +**SLP** + Superword-Level Parallelism, same as :ref:`Basic-Block Vectorization + `. + **SRoA** Scalar Replacement of Aggregates diff --git a/docs/LinkTimeOptimization.rst b/docs/LinkTimeOptimization.rst index 7eacf0bd0d01..c15abd325ed0 100644 --- a/docs/LinkTimeOptimization.rst +++ b/docs/LinkTimeOptimization.rst @@ -1,5 +1,3 @@ -.. _lto: - ====================================================== LLVM Link Time Optimization: Design and Implementation ====================================================== @@ -85,9 +83,10 @@ invokes system linker. return foo1(); } -.. code-block:: bash +To compile, run: + +.. code-block:: console - --- command lines --- % clang -emit-llvm -c a.c -o a.o # <-- a.o is LLVM bitcode file % clang -c main.c -o main.o # <-- main.o is native object file % clang a.o main.o -o main # <-- standard link command without modifications @@ -96,7 +95,7 @@ invokes system linker. visible symbol defined in LLVM bitcode file. The linker completes its usual symbol resolution pass and finds that ``foo2()`` is not used anywhere. This information is used by the LLVM optimizer and it - removes ``foo2()``. + removes ``foo2()``. * As soon as ``foo2()`` is removed, the optimizer recognizes that condition ``i < 0`` is always false, which means ``foo3()`` is never used. Hence, the diff --git a/docs/Makefile.sphinx b/docs/Makefile.sphinx index 81c13de9cd9e..21f66488b2b7 100644 --- a/docs/Makefile.sphinx +++ b/docs/Makefile.sphinx @@ -46,10 +46,6 @@ clean: html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo - @# FIXME: Remove this `cp` once HTML->Sphinx transition is completed. - @# Kind of a hack, but HTML-formatted docs are on the way out anyway. - @echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html" - @cp -a *.html tutorial $(BUILDDIR)/html @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: diff --git a/docs/MakefileGuide.rst b/docs/MakefileGuide.rst index d2bdd24a9e7f..3e9090788654 100644 --- a/docs/MakefileGuide.rst +++ b/docs/MakefileGuide.rst @@ -1,5 +1,3 @@ -.. _makefile_guide: - =================== LLVM Makefile Guide =================== @@ -60,7 +58,7 @@ To use the makefile system, you simply create a file named ``Makefile`` in your directory and declare values for certain variables. The variables and values that you select determine what the makefile system will do. These variables enable rules and processing in the makefile system that automatically Do The -Right Thing™. +Right Thing (C). Including Makefiles ------------------- @@ -170,9 +168,9 @@ openable with the ``dlopen`` function and searchable with the ``dlsym`` function (or your operating system's equivalents). While this isn't strictly necessary on Linux and a few other platforms, it is required on systems like HP-UX and Darwin. You should use ``LOADABLE_MODULE`` for any shared library that you -intend to be loaded into an tool via the ``-load`` option. See the -`WritingAnLLVMPass.html `_ document for an -example of why you might want to do this. +intend to be loaded into an tool via the ``-load`` option. `Pass documentation +`_ has an example of why you might want to do +this. Bitcode Modules ^^^^^^^^^^^^^^^ @@ -241,7 +239,7 @@ and the names of the libraries you wish to link with the tool. For example: says that we are to build a tool name ``mytool`` and that it requires three libraries: ``mylib``, ``LLVMSupport.a`` and ``LLVMSystem.a``. -Note that two different variables are use to indicate which libraries are +Note that two different variables are used to indicate which libraries are linked: ``USEDLIBS`` and ``LLVMLIBS``. This distinction is necessary to support projects. ``LLVMLIBS`` refers to the LLVM libraries found in the LLVM object directory. ``USEDLIBS`` refers to the libraries built by your project. In the @@ -339,7 +337,7 @@ the invocation of ``make check-local`` in the ``test`` directory. The intended usage for this is to assist in running specific suites of tests. If ``TESTSUITE`` is not set, the implementation of ``check-local`` should run all normal tests. It is up to the project to define what different values for -``TESTSUTE`` will do. See the `Testing Guide `_ for further +``TESTSUTE`` will do. See the :doc:`Testing Guide ` for further details. ``check-local`` @@ -348,9 +346,9 @@ details. This target should be implemented by the ``Makefile`` in the project's ``test`` directory. It is invoked by the ``check`` target elsewhere. Each project is free to define the actions of ``check-local`` as appropriate for that -project. The LLVM project itself uses dejagnu to run a suite of feature and -regresson tests. Other projects may choose to use dejagnu or any other testing -mechanism. +project. The LLVM project itself uses the :doc:`Lit ` testing +tool to run a suite of feature and regression tests. Other projects may choose +to use :program:`lit` or any other testing mechanism. ``clean`` --------- @@ -358,7 +356,7 @@ mechanism. This target cleans the build directory, recursively removing all things that the Makefile builds. The cleaning rules have been made guarded so they shouldn't go awry (via ``rm -f $(UNSET_VARIABLE)/*`` which will attempt to erase the entire -directory structure. +directory structure). ``clean-local`` --------------- @@ -606,8 +604,8 @@ system that tell it what to do for the current directory. the build process, such as code generators (e.g. ``tblgen``). ``OPTIONAL_DIRS`` - Specify a set of directories that may be built, if they exist, but its not - an error for them not to exist. + Specify a set of directories that may be built, if they exist, but it is + not an error for them not to exist. ``PARALLEL_DIRS`` Specify a set of directories to build recursively and in parallel if the @@ -701,6 +699,9 @@ The override variables are given below: ``CFLAGS`` Additional flags to be passed to the 'C' compiler. +``CPPFLAGS`` + Additional flags passed to the C/C++ preprocessor. + ``CXX`` Specifies the path to the C++ compiler. diff --git a/docs/MarkedUpDisassembly.rst b/docs/MarkedUpDisassembly.rst index e1282e102ebe..cc4dbc817e08 100644 --- a/docs/MarkedUpDisassembly.rst +++ b/docs/MarkedUpDisassembly.rst @@ -1,5 +1,3 @@ -.. _marked_up_disassembly: - ======================================= LLVM's Optional Rich Disassembly Output ======================================= diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst new file mode 100644 index 000000000000..5451619686d9 --- /dev/null +++ b/docs/NVPTXUsage.rst @@ -0,0 +1,276 @@ +============================= +User Guide for NVPTX Back-end +============================= + +.. contents:: + :local: + :depth: 3 + + +Introduction +============ + +To support GPU programming, the NVPTX back-end supports a subset of LLVM IR +along with a defined set of conventions used to represent GPU programming +concepts. This document provides an overview of the general usage of the back- +end, including a description of the conventions used and the set of accepted +LLVM IR. + +.. note:: + + This document assumes a basic familiarity with CUDA and the PTX + assembly language. Information about the CUDA Driver API and the PTX assembly + language can be found in the `CUDA documentation + `_. + + + +Conventions +=========== + +Marking Functions as Kernels +---------------------------- + +In PTX, there are two types of functions: *device functions*, which are only +callable by device code, and *kernel functions*, which are callable by host +code. By default, the back-end will emit device functions. Metadata is used to +declare a function as a kernel function. This metadata is attached to the +``nvvm.annotations`` named metadata object, and has the following format: + +.. code-block:: llvm + + !0 = metadata !{, metadata !"kernel", i32 1} + +The first parameter is a reference to the kernel function. The following +example shows a kernel function calling a device function in LLVM IR. The +function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not. + +.. code-block:: llvm + + define float @my_fmad(float %x, float %y, float %z) { + %mul = fmul float %x, %y + %add = fadd float %mul, %z + ret float %add + } + + define void @my_kernel(float* %ptr) { + %val = load float* %ptr + %ret = call float @my_fmad(float %val, float %val, float %val) + store float %ret, float* %ptr + ret void + } + + !nvvm.annotations = !{!1} + !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1} + +When compiled, the PTX kernel functions are callable by host-side code. + + +Address Spaces +-------------- + +The NVPTX back-end uses the following address space mapping: + + ============= ====================== + Address Space Memory Space + ============= ====================== + 0 Generic + 1 Global + 2 Internal Use + 3 Shared + 4 Constant + 5 Local + ============= ====================== + +Every global variable and pointer type is assigned to one of these address +spaces, with 0 being the default address space. Intrinsics are provided which +can be used to convert pointers between the generic and non-generic address +spaces. + +As an example, the following IR will define an array ``@g`` that resides in +global device memory. + +.. code-block:: llvm + + @g = internal addrspace(1) global [4 x i32] [ i32 0, i32 1, i32 2, i32 3 ] + +LLVM IR functions can read and write to this array, and host-side code can +copy data to it by name with the CUDA Driver API. + +Note that since address space 0 is the generic space, it is illegal to have +global variables in address space 0. Address space 0 is the default address +space in LLVM, so the ``addrspace(N)`` annotation is *required* for global +variables. + + +NVPTX Intrinsics +================ + +Address Space Conversion +------------------------ + +'``llvm.nvvm.ptr.*.to.gen``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*) + declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*) + declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) + declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic +address space to a generic address space pointer. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid generic address space +pointer. + + +'``llvm.nvvm.ptr.gen.to.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare i8* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8 addrspace(1)*) + declare i8* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8 addrspace(3)*) + declare i8* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8 addrspace(4)*) + declare i8* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8 addrspace(5)*) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic +address space to a pointer in the target address space. Note that these +intrinsics are only useful if the address space of the target address space of +the pointer is known. It is not legal to use address space conversion +intrinsics to convert a pointer from one non-generic address space to another +non-generic address space. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid pointer in the target +non-generic address space. + + +Reading PTX Special Registers +----------------------------- + +'``llvm.nvvm.read.ptx.sreg.*``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() + +Overview: +""""""""" + +The '``@llvm.nvvm.read.ptx.sreg.*``' intrinsics provide access to the PTX +special registers, in particular the kernel launch bounds. These registers +map in the following way to CUDA builtins: + + ============ ===================================== + CUDA Builtin PTX Special Register Intrinsic + ============ ===================================== + ``threadId`` ``@llvm.nvvm.read.ptx.sreg.tid.*`` + ``blockIdx`` ``@llvm.nvvm.read.ptx.sreg.ctaid.*`` + ``blockDim`` ``@llvm.nvvm.read.ptx.sreg.ntid.*`` + ``gridDim`` ``@llvm.nvvm.read.ptx.sreg.nctaid.*`` + ============ ===================================== + + +Barriers +-------- + +'``llvm.nvvm.barrier0``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.barrier0() + +Overview: +""""""""" + +The '``@llvm.nvvm.barrier0()``' intrinsic emits a PTX ``bar.sync 0`` +instruction, equivalent to the ``__syncthreads()`` call in CUDA. + + +Other Intrinsics +---------------- + +For the full set of NVPTX intrinsics, please see the +``include/llvm/IR/IntrinsicsNVVM.td`` file in the LLVM source tree. + + +Executing PTX +============= + +The most common way to execute PTX assembly on a GPU device is to use the CUDA +Driver API. This API is a low-level interface to the GPU driver and allows for +JIT compilation of PTX code to native GPU machine code. + +Initializing the Driver API: + +.. code-block:: c++ + + CUdevice device; + CUcontext context; + + // Initialize the driver API + cuInit(0); + // Get a handle to the first compute device + cuDeviceGet(&device, 0); + // Create a compute device context + cuCtxCreate(&context, 0, device); + +JIT compiling a PTX string to a device binary: + +.. code-block:: c++ + + CUmodule module; + CUfunction funcion; + + // JIT compile a null-terminated PTX string + cuModuleLoadData(&module, (void*)PTXString); + + // Get a handle to the "myfunction" kernel function + cuModuleGetFunction(&function, module, "myfunction"); + +For full examples of executing PTX assembly, please see the `CUDA Samples +`_ distribution. diff --git a/docs/Packaging.rst b/docs/Packaging.rst index 6e74158d7213..7c2dc956128e 100644 --- a/docs/Packaging.rst +++ b/docs/Packaging.rst @@ -1,5 +1,3 @@ -.. _packaging: - ======================== Advice on Packaging LLVM ======================== diff --git a/docs/Passes.html b/docs/Passes.html deleted file mode 100644 index 16e8bd6f6b13..000000000000 --- a/docs/Passes.html +++ /dev/null @@ -1,2049 +0,0 @@ - - - - LLVM's Analysis and Transform Passes - - - - - - - -

LLVM's Analysis and Transform Passes

- -
    -
  1. Introduction
  2. -
  3. Analysis Passes -
  4. Transform Passes
  5. -
  6. Utility Passes
  7. -
- -
-

Written by Reid Spencer - and Gordon Henriksen

-
- - -

Introduction

-
-

This document serves as a high level summary of the optimization features - that LLVM provides. Optimizations are implemented as Passes that traverse some - portion of a program to either collect information or transform the program. - The table below divides the passes that LLVM provides into three categories. - Analysis passes compute information that other passes can use or for debugging - or program visualization purposes. Transform passes can use (or invalidate) - the analysis passes. Transform passes all mutate the program in some way. - Utility passes provides some utility but don't otherwise fit categorization. - For example passes to extract functions to bitcode or write a module to - bitcode are neither analysis nor transform passes. -

The table below provides a quick summary of each pass and links to the more - complete pass description later in the document.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ANALYSIS PASSES
OptionName
-aa-evalExhaustive Alias Analysis Precision Evaluator
-basicaaBasic Alias Analysis (stateless AA impl)
-basiccgBasic CallGraph Construction
-count-aaCount Alias Analysis Query Responses
-daDependence Analysis
-debug-aaAA use debugger
-domfrontierDominance Frontier Construction
-domtreeDominator Tree Construction
-dot-callgraphPrint Call Graph to 'dot' file
-dot-cfgPrint CFG of function to 'dot' file
-dot-cfg-onlyPrint CFG of function to 'dot' file (with no function bodies)
-dot-domPrint dominance tree of function to 'dot' file
-dot-dom-onlyPrint dominance tree of function to 'dot' file (with no function bodies)
-dot-postdomPrint postdominance tree of function to 'dot' file
-dot-postdom-onlyPrint postdominance tree of function to 'dot' file (with no function bodies)
-globalsmodref-aaSimple mod/ref analysis for globals
-instcountCounts the various types of Instructions
-intervalsInterval Partition Construction
-iv-usersInduction Variable Users
-lazy-value-infoLazy Value Information Analysis
-libcall-aaLibCall Alias Analysis
-lintStatically lint-checks LLVM IR
-loopsNatural Loop Information
-memdepMemory Dependence Analysis
-module-debuginfoDecodes module-level debug info
-no-aaNo Alias Analysis (always returns 'may' alias)
-no-profileNo Profile Information
-postdomtreePost-Dominator Tree Construction
-print-alias-setsAlias Set Printer
-print-callgraphPrint a call graph
-print-callgraph-sccsPrint SCCs of the Call Graph
-print-cfg-sccsPrint SCCs of each function CFG
-print-dbginfoPrint debug info in human readable form
-print-dom-infoDominator Info Printer
-print-externalfnconstantsPrint external fn callsites passed constants
-print-functionPrint function to stderr
-print-modulePrint module to stderr
-print-used-typesFind Used Types
-profile-estimatorEstimate profiling information
-profile-loaderLoad profile information from llvmprof.out
-profile-verifierVerify profiling information
-regionsDetect single entry single exit regions
-scalar-evolutionScalar Evolution Analysis
-scev-aaScalarEvolution-based Alias Analysis
-targetdataTarget Data Layout
TRANSFORM PASSES
OptionName
-adceAggressive Dead Code Elimination
-always-inlineInliner for always_inline functions
-argpromotionPromote 'by reference' arguments to scalars
-bb-vectorizeCombine instructions to form vector instructions within basic blocks
-block-placementProfile Guided Basic Block Placement
-break-crit-edgesBreak critical edges in CFG
-codegenprepareOptimize for code generation
-constmergeMerge Duplicate Global Constants
-constpropSimple constant propagation
-dceDead Code Elimination
-deadargelimDead Argument Elimination
-deadtypeelimDead Type Elimination
-dieDead Instruction Elimination
-dseDead Store Elimination
-functionattrsDeduce function attributes
-globaldceDead Global Elimination
-globaloptGlobal Variable Optimizer
-gvnGlobal Value Numbering
-indvarsCanonicalize Induction Variables
-inlineFunction Integration/Inlining
-insert-edge-profilingInsert instrumentation for edge profiling
-insert-optimal-edge-profilingInsert optimal instrumentation for edge profiling
-instcombineCombine redundant instructions
-internalizeInternalize Global Symbols
-ipconstpropInterprocedural constant propagation
-ipsccpInterprocedural Sparse Conditional Constant Propagation
-jump-threadingJump Threading
-lcssaLoop-Closed SSA Form Pass
-licmLoop Invariant Code Motion
-loop-deletionDelete dead loops
-loop-extractExtract loops into new functions
-loop-extract-singleExtract at most one loop into a new function
-loop-reduceLoop Strength Reduction
-loop-rotateRotate Loops
-loop-simplifyCanonicalize natural loops
-loop-unrollUnroll loops
-loop-unswitchUnswitch loops
-loweratomicLower atomic intrinsics to non-atomic form
-lowerinvokeLower invoke and unwind, for unwindless code generators
-lowerswitchLower SwitchInst's to branches
-mem2regPromote Memory to Register
-memcpyoptMemCpy Optimization
-mergefuncMerge Functions
-mergereturnUnify function exit nodes
-partial-inlinerPartial Inliner
-prune-ehRemove unused exception handling info
-reassociateReassociate expressions
-reg2memDemote all values to stack slots
-scalarreplScalar Replacement of Aggregates (DT)
-sccpSparse Conditional Constant Propagation
-simplify-libcallsSimplify well-known library calls
-simplifycfgSimplify the CFG
-sinkCode sinking
-sretpromotionPromote sret arguments to multiple ret values
-stripStrip all symbols from a module
-strip-dead-debug-infoStrip debug info for unused symbols
-strip-dead-prototypesStrip Unused Function Prototypes
-strip-debug-declareStrip all llvm.dbg.declare intrinsics
-strip-nondebugStrip all symbols, except dbg symbols, from a module
-tailcallelimTail Call Elimination
UTILITY PASSES
OptionName
-deadarghaX0rDead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)
-extract-blocksExtract Basic Blocks From Module (for bugpoint use)
-instnamerAssign names to anonymous instructions
-preverifyPreliminary module verification
-verifyModule Verifier
-view-cfgView CFG of function
-view-cfg-onlyView CFG of function (with no function bodies)
-view-domView dominance tree of function
-view-dom-onlyView dominance tree of function (with no function bodies)
-view-postdomView postdominance tree of function
-view-postdom-onlyView postdominance tree of function (with no function bodies)
- -
- - -

Analysis Passes

-
-

This section describes the LLVM Analysis Passes.

- - -

- -aa-eval: Exhaustive Alias Analysis Precision Evaluator -

-
-

This is a simple N^2 alias analysis accuracy evaluator. - Basically, for each function in the program, it simply queries to see how the - alias analysis implementation answers alias queries between each pair of - pointers in the function.

- -

This is inspired and adapted from code by: Naveen Neelakantam, Francesco - Spadini, and Wojciech Stryjewski.

-
- - -

- -basicaa: Basic Alias Analysis (stateless AA impl) -

-
-

A basic alias analysis pass that implements identities (two different - globals cannot alias, etc), but does no stateful analysis.

-
- - -

- -basiccg: Basic CallGraph Construction -

-
-

Yet to be written.

-
- - -

- -count-aa: Count Alias Analysis Query Responses -

-
-

- A pass which can be used to count how many alias queries - are being made and how the alias analysis implementation being used responds. -

-
- - -

- -da: Dependence Analysis -

-
-

Dependence analysis framework, which is used to detect dependences in - memory accesses.

-
- - -

- -debug-aa: AA use debugger -

-
-

- This simple pass checks alias analysis users to ensure that if they - create a new value, they do not query AA without informing it of the value. - It acts as a shim over any other AA pass you want. -

- -

- Yes keeping track of every value in the program is expensive, but this is - a debugging pass. -

-
- - -

- -domfrontier: Dominance Frontier Construction -

-
-

- This pass is a simple dominator construction algorithm for finding forward - dominator frontiers. -

-
- - -

- -domtree: Dominator Tree Construction -

-
-

- This pass is a simple dominator construction algorithm for finding forward - dominators. -

-
- - -

- -dot-callgraph: Print Call Graph to 'dot' file -

-
-

- This pass, only available in opt, prints the call graph into a - .dot graph. This graph can then be processed with the "dot" tool - to convert it to postscript or some other suitable format. -

-
- - -

- -dot-cfg: Print CFG of function to 'dot' file -

-
-

- This pass, only available in opt, prints the control flow graph - into a .dot graph. This graph can then be processed with the - "dot" tool to convert it to postscript or some other suitable format. -

-
- - -

- -dot-cfg-only: Print CFG of function to 'dot' file (with no function bodies) -

-
-

- This pass, only available in opt, prints the control flow graph - into a .dot graph, omitting the function bodies. This graph can - then be processed with the "dot" tool to convert it to postscript or some - other suitable format. -

-
- - -

- -dot-dom: Print dominance tree of function to 'dot' file -

-
-

- This pass, only available in opt, prints the dominator tree - into a .dot graph. This graph can then be processed with the - "dot" tool to convert it to postscript or some other suitable format. -

-
- - -

- -dot-dom-only: Print dominance tree of function to 'dot' file (with no function bodies) -

-
-

- This pass, only available in opt, prints the dominator tree - into a .dot graph, omitting the function bodies. This graph can - then be processed with the "dot" tool to convert it to postscript or some - other suitable format. -

-
- - -

- -dot-postdom: Print postdominance tree of function to 'dot' file -

-
-

- This pass, only available in opt, prints the post dominator tree - into a .dot graph. This graph can then be processed with the - "dot" tool to convert it to postscript or some other suitable format. -

-
- - -

- -dot-postdom-only: Print postdominance tree of function to 'dot' file (with no function bodies) -

-
-

- This pass, only available in opt, prints the post dominator tree - into a .dot graph, omitting the function bodies. This graph can - then be processed with the "dot" tool to convert it to postscript or some - other suitable format. -

-
- - -

- -globalsmodref-aa: Simple mod/ref analysis for globals -

-
-

- This simple pass provides alias and mod/ref information for global values - that do not have their address taken, and keeps track of whether functions - read or write memory (are "pure"). For this simple (but very common) case, - we can provide pretty accurate and useful information. -

-
- - -

- -instcount: Counts the various types of Instructions -

-
-

- This pass collects the count of all instructions and reports them -

-
- - -

- -intervals: Interval Partition Construction -

-
-

- This analysis calculates and represents the interval partition of a function, - or a preexisting interval partition. -

- -

- In this way, the interval partition may be used to reduce a flow graph down - to its degenerate single node interval partition (unless it is irreducible). -

-
- - -

- -iv-users: Induction Variable Users -

-
-

Bookkeeping for "interesting" users of expressions computed from - induction variables.

-
- - -

- -lazy-value-info: Lazy Value Information Analysis -

-
-

Interface for lazy computation of value constraint information.

-
- - -

- -libcall-aa: LibCall Alias Analysis -

-
-

LibCall Alias Analysis.

-
- - -

- -lint: Statically lint-checks LLVM IR -

-
-

This pass statically checks for common and easily-identified constructs - which produce undefined or likely unintended behavior in LLVM IR.

- -

It is not a guarantee of correctness, in two ways. First, it isn't - comprehensive. There are checks which could be done statically which are - not yet implemented. Some of these are indicated by TODO comments, but - those aren't comprehensive either. Second, many conditions cannot be - checked statically. This pass does no dynamic instrumentation, so it - can't check for all possible problems.

- -

Another limitation is that it assumes all code will be executed. A store - through a null pointer in a basic block which is never reached is harmless, - but this pass will warn about it anyway.

- -

Optimization passes may make conditions that this pass checks for more or - less obvious. If an optimization pass appears to be introducing a warning, - it may be that the optimization pass is merely exposing an existing - condition in the code.

- -

This code may be run before instcombine. In many cases, instcombine checks - for the same kinds of things and turns instructions with undefined behavior - into unreachable (or equivalent). Because of this, this pass makes some - effort to look through bitcasts and so on. -

-
- - -

- -loops: Natural Loop Information -

-
-

- This analysis is used to identify natural loops and determine the loop depth - of various nodes of the CFG. Note that the loops identified may actually be - several natural loops that share the same header node... not just a single - natural loop. -

-
- - -

- -memdep: Memory Dependence Analysis -

-
-

- An analysis that determines, for a given memory operation, what preceding - memory operations it depends on. It builds on alias analysis information, and - tries to provide a lazy, caching interface to a common kind of alias - information query. -

-
- - -

- -module-debuginfo: Decodes module-level debug info -

-
-

This pass decodes the debug info metadata in a module and prints in a - (sufficiently-prepared-) human-readable form. - - For example, run this pass from opt along with the -analyze option, and - it'll print to standard output. -

-
- - -

- -no-aa: No Alias Analysis (always returns 'may' alias) -

-
-

- This is the default implementation of the Alias Analysis interface. It always - returns "I don't know" for alias queries. NoAA is unlike other alias analysis - implementations, in that it does not chain to a previous analysis. As such it - doesn't follow many of the rules that other alias analyses must. -

-
- - -

- -no-profile: No Profile Information -

-
-

- The default "no profile" implementation of the abstract - ProfileInfo interface. -

-
- - -

- -postdomfrontier: Post-Dominance Frontier Construction -

-
-

- This pass is a simple post-dominator construction algorithm for finding - post-dominator frontiers. -

-
- - -

- -postdomtree: Post-Dominator Tree Construction -

-
-

- This pass is a simple post-dominator construction algorithm for finding - post-dominators. -

-
- - -

- -print-alias-sets: Alias Set Printer -

-
-

Yet to be written.

-
- - -

- -print-callgraph: Print a call graph -

-
-

- This pass, only available in opt, prints the call graph to - standard error in a human-readable form. -

-
- - -

- -print-callgraph-sccs: Print SCCs of the Call Graph -

-
-

- This pass, only available in opt, prints the SCCs of the call - graph to standard error in a human-readable form. -

-
- - -

- -print-cfg-sccs: Print SCCs of each function CFG -

-
-

- This pass, only available in opt, prints the SCCs of each - function CFG to standard error in a human-readable form. -

-
- - -

- -print-dbginfo: Print debug info in human readable form -

-
-

Pass that prints instructions, and associated debug info:

-
    - -
  • source/line/col information
  • -
  • original variable name
  • -
  • original type name
  • -
-
- - -

- -print-dom-info: Dominator Info Printer -

-
-

Dominator Info Printer.

-
- - -

- -print-externalfnconstants: Print external fn callsites passed constants -

-
-

- This pass, only available in opt, prints out call sites to - external functions that are called with constant arguments. This can be - useful when looking for standard library functions we should constant fold - or handle in alias analyses. -

-
- - -

- -print-function: Print function to stderr -

-
-

- The PrintFunctionPass class is designed to be pipelined with - other FunctionPasses, and prints out the functions of the module - as they are processed. -

-
- - -

- -print-module: Print module to stderr -

-
-

- This pass simply prints out the entire module when it is executed. -

-
- - -

- -print-used-types: Find Used Types -

-
-

- This pass is used to seek out all of the types in use by the program. Note - that this analysis explicitly does not include types only used by the symbol - table. -

- - -

- -profile-estimator: Estimate profiling information -

-
-

Profiling information that estimates the profiling information - in a very crude and unimaginative way. -

-
- - -

- -profile-loader: Load profile information from llvmprof.out -

-
-

- A concrete implementation of profiling information that loads the information - from a profile dump file. -

-
- - -

- -profile-verifier: Verify profiling information -

-
-

Pass that checks profiling information for plausibility.

-
-

- -regions: Detect single entry single exit regions -

-
-

- The RegionInfo pass detects single entry single exit regions in a - function, where a region is defined as any subgraph that is connected to the - remaining graph at only two spots. Furthermore, an hierarchical region tree is - built. -

-
- - -

- -scalar-evolution: Scalar Evolution Analysis -

-
-

- The ScalarEvolution analysis can be used to analyze and - catagorize scalar expressions in loops. It specializes in recognizing general - induction variables, representing them with the abstract and opaque - SCEV class. Given this analysis, trip counts of loops and other - important properties can be obtained. -

- -

- This analysis is primarily useful for induction variable substitution and - strength reduction. -

-
- - -

- -scev-aa: ScalarEvolution-based Alias Analysis -

-
-

Simple alias analysis implemented in terms of ScalarEvolution queries. - - This differs from traditional loop dependence analysis in that it tests - for dependencies within a single iteration of a loop, rather than - dependencies between different iterations. - - ScalarEvolution has a more complete understanding of pointer arithmetic - than BasicAliasAnalysis' collection of ad-hoc analyses. -

-
- - -

- -targetdata: Target Data Layout -

-
-

Provides other passes access to information on how the size and alignment - required by the target ABI for various data types.

-
- -
- - -

Transform Passes

-
-

This section describes the LLVM Transform Passes.

- - -

- -adce: Aggressive Dead Code Elimination -

-
-

ADCE aggressively tries to eliminate code. This pass is similar to - DCE but it assumes that values are dead until proven - otherwise. This is similar to SCCP, except applied to - the liveness of values.

-
- - -

- -always-inline: Inliner for always_inline functions -

-
-

A custom inliner that handles only functions that are marked as - "always inline".

-
- - -

- -argpromotion: Promote 'by reference' arguments to scalars -

-
-

- This pass promotes "by reference" arguments to be "by value" arguments. In - practice, this means looking for internal functions that have pointer - arguments. If it can prove, through the use of alias analysis, that an - argument is *only* loaded, then it can pass the value into the function - instead of the address of the value. This can cause recursive simplification - of code and lead to the elimination of allocas (especially in C++ template - code like the STL). -

- -

- This pass also handles aggregate arguments that are passed into a function, - scalarizing them if the elements of the aggregate are only loaded. Note that - it refuses to scalarize aggregates which would require passing in more than - three operands to the function, because passing thousands of operands for a - large array or structure is unprofitable! -

- -

- Note that this transformation could also be done for arguments that are only - stored to (returning the value instead), but does not currently. This case - would be best handled when and if LLVM starts supporting multiple return - values from functions. -

-
- - -

- -bb-vectorize: Basic-Block Vectorization -

-
-

This pass combines instructions inside basic blocks to form vector - instructions. It iterates over each basic block, attempting to pair - compatible instructions, repeating this process until no additional - pairs are selected for vectorization. When the outputs of some pair - of compatible instructions are used as inputs by some other pair of - compatible instructions, those pairs are part of a potential - vectorization chain. Instruction pairs are only fused into vector - instructions when they are part of a chain longer than some - threshold length. Moreover, the pass attempts to find the best - possible chain for each pair of compatible instructions. These - heuristics are intended to prevent vectorization in cases where - it would not yield a performance increase of the resulting code. -

-
- - -

- -block-placement: Profile Guided Basic Block Placement -

-
-

This pass is a very simple profile guided basic block placement algorithm. - The idea is to put frequently executed blocks together at the start of the - function and hopefully increase the number of fall-through conditional - branches. If there is no profile information for a particular function, this - pass basically orders blocks in depth-first order.

-
- - -

- -break-crit-edges: Break critical edges in CFG -

-
-

- Break all of the critical edges in the CFG by inserting a dummy basic block. - It may be "required" by passes that cannot deal with critical edges. This - transformation obviously invalidates the CFG, but can update forward dominator - (set, immediate dominators, tree, and frontier) information. -

-
- - -

- -codegenprepare: Optimize for code generation -

-
- This pass munges the code in the input function to better prepare it for - SelectionDAG-based code generation. This works around limitations in it's - basic-block-at-a-time approach. It should eventually be removed. -
- - -

- -constmerge: Merge Duplicate Global Constants -

-
-

- Merges duplicate global constants together into a single constant that is - shared. This is useful because some passes (ie TraceValues) insert a lot of - string constants into the program, regardless of whether or not an existing - string is available. -

-
- - -

- -constprop: Simple constant propagation -

-
-

This file implements constant propagation and merging. It looks for - instructions involving only constant operands and replaces them with a - constant value instead of an instruction. For example:

-
add i32 1, 2
-

becomes

-
i32 3
-

NOTE: this pass has a habit of making definitions be dead. It is a good - idea to to run a DIE (Dead Instruction Elimination) pass - sometime after running this pass.

-
- - -

- -dce: Dead Code Elimination -

-
-

- Dead code elimination is similar to dead instruction - elimination, but it rechecks instructions that were used by removed - instructions to see if they are newly dead. -

-
- - -

- -deadargelim: Dead Argument Elimination -

-
-

- This pass deletes dead arguments from internal functions. Dead argument - elimination removes arguments which are directly dead, as well as arguments - only passed into function calls as dead arguments of other functions. This - pass also deletes dead arguments in a similar way. -

- -

- This pass is often useful as a cleanup pass to run after aggressive - interprocedural passes, which add possibly-dead arguments. -

-
- - -

- -deadtypeelim: Dead Type Elimination -

-
-

- This pass is used to cleanup the output of GCC. It eliminate names for types - that are unused in the entire translation unit, using the find used types pass. -

-
- - -

- -die: Dead Instruction Elimination -

-
-

- Dead instruction elimination performs a single pass over the function, - removing instructions that are obviously dead. -

-
- - -

- -dse: Dead Store Elimination -

-
-

- A trivial dead store elimination that only considers basic-block local - redundant stores. -

-
- - -

- -functionattrs: Deduce function attributes -

-
-

A simple interprocedural pass which walks the call-graph, looking for - functions which do not access or only read non-local memory, and marking them - readnone/readonly. In addition, it marks function arguments (of pointer type) - 'nocapture' if a call to the function does not create any copies of the pointer - value that outlive the call. This more or less means that the pointer is only - dereferenced, and not returned from the function or stored in a global. - This pass is implemented as a bottom-up traversal of the call-graph. -

-
- - -

- -globaldce: Dead Global Elimination -

-
-

- This transform is designed to eliminate unreachable internal globals from the - program. It uses an aggressive algorithm, searching out globals that are - known to be alive. After it finds all of the globals which are needed, it - deletes whatever is left over. This allows it to delete recursive chunks of - the program which are unreachable. -

-
- - -

- -globalopt: Global Variable Optimizer -

-
-

- This pass transforms simple global variables that never have their address - taken. If obviously true, it marks read/write globals as constant, deletes - variables only stored to, etc. -

-
- - -

- -gvn: Global Value Numbering -

-
-

- This pass performs global value numbering to eliminate fully and partially - redundant instructions. It also performs redundant load elimination. -

-
- - -

- -indvars: Canonicalize Induction Variables -

-
-

- This transformation analyzes and transforms the induction variables (and - computations derived from them) into simpler forms suitable for subsequent - analysis and transformation. -

- -

- This transformation makes the following changes to each loop with an - identifiable induction variable: -

- -
    -
  1. All loops are transformed to have a single canonical - induction variable which starts at zero and steps by one.
  2. -
  3. The canonical induction variable is guaranteed to be the first PHI node - in the loop header block.
  4. -
  5. Any pointer arithmetic recurrences are raised to use array - subscripts.
  6. -
- -

- If the trip count of a loop is computable, this pass also makes the following - changes: -

- -
    -
  1. The exit condition for the loop is canonicalized to compare the - induction value against the exit value. This turns loops like: -
    for (i = 7; i*i < 1000; ++i)
    - into -
    for (i = 0; i != 25; ++i)
  2. -
  3. Any use outside of the loop of an expression derived from the indvar - is changed to compute the derived value outside of the loop, eliminating - the dependence on the exit value of the induction variable. If the only - purpose of the loop is to compute the exit value of some derived - expression, this transformation will make the loop dead.
  4. -
- -

- This transformation should be followed by strength reduction after all of the - desired loop transformations have been performed. Additionally, on targets - where it is profitable, the loop could be transformed to count down to zero - (the "do loop" optimization). -

-
- - -

- -inline: Function Integration/Inlining -

-
-

- Bottom-up inlining of functions into callees. -

-
- - -

- -insert-edge-profiling: Insert instrumentation for edge profiling -

-
-

- This pass instruments the specified program with counters for edge profiling. - Edge profiling can give a reasonable approximation of the hot paths through a - program, and is used for a wide variety of program transformations. -

- -

- Note that this implementation is very naïve. It inserts a counter for - every edge in the program, instead of using control flow information - to prune the number of counters inserted. -

-
- - -

- -insert-optimal-edge-profiling: Insert optimal instrumentation for edge profiling -

-
-

This pass instruments the specified program with counters for edge profiling. - Edge profiling can give a reasonable approximation of the hot paths through a - program, and is used for a wide variety of program transformations. -

-
- - -

- -instcombine: Combine redundant instructions -

-
-

- Combine instructions to form fewer, simple - instructions. This pass does not modify the CFG This pass is where algebraic - simplification happens. -

- -

- This pass combines things like: -

- -
%Y = add i32 %X, 1
-%Z = add i32 %Y, 1
- -

- into: -

- -
%Z = add i32 %X, 2
- -

- This is a simple worklist driven algorithm. -

- -

- This pass guarantees that the following canonicalizations are performed on - the program: -

- -
    -
  • If a binary operator has a constant operand, it is moved to the right- - hand side.
  • -
  • Bitwise operators with constant operands are always grouped so that - shifts are performed first, then ors, then - ands, then xors.
  • -
  • Compare instructions are converted from <, - >, ≤, or ≥ to - = or ≠ if possible.
  • -
  • All cmp instructions on boolean values are replaced with - logical operations.
  • -
  • add X, X is represented as - mul X, 2 ⇒ shl X, 1
  • -
  • Multiplies with a constant power-of-two argument are transformed into - shifts.
  • -
  • … etc.
  • -
-
- - -

- -internalize: Internalize Global Symbols -

-
-

- This pass loops over all of the functions in the input module, looking for a - main function. If a main function is found, all other functions and all - global variables with initializers are marked as internal. -

-
- - -

- -ipconstprop: Interprocedural constant propagation -

-
-

- This pass implements an extremely simple interprocedural constant - propagation pass. It could certainly be improved in many different ways, - like using a worklist. This pass makes arguments dead, but does not remove - them. The existing dead argument elimination pass should be run after this - to clean up the mess. -

-
- - -

- -ipsccp: Interprocedural Sparse Conditional Constant Propagation -

-
-

- An interprocedural variant of Sparse Conditional Constant - Propagation. -

-
- - -

- -jump-threading: Jump Threading -

-
-

- Jump threading tries to find distinct threads of control flow running through - a basic block. This pass looks at blocks that have multiple predecessors and - multiple successors. If one or more of the predecessors of the block can be - proven to always cause a jump to one of the successors, we forward the edge - from the predecessor to the successor by duplicating the contents of this - block. -

-

- An example of when this can occur is code like this: -

- -
if () { ...
-  X = 4;
-}
-if (X < 3) {
- -

- In this case, the unconditional branch at the end of the first if can be - revectored to the false side of the second if. -

-
- - -

- -lcssa: Loop-Closed SSA Form Pass -

-
-

- This pass transforms loops by placing phi nodes at the end of the loops for - all values that are live across the loop boundary. For example, it turns - the left into the right code: -

- -
for (...)                for (...)
-  if (c)                   if (c)
-    X1 = ...                 X1 = ...
-  else                     else
-    X2 = ...                 X2 = ...
-  X3 = phi(X1, X2)         X3 = phi(X1, X2)
-... = X3 + 4              X4 = phi(X3)
-                          ... = X4 + 4
- -

- This is still valid LLVM; the extra phi nodes are purely redundant, and will - be trivially eliminated by InstCombine. The major benefit of - this transformation is that it makes many other loop optimizations, such as - LoopUnswitching, simpler. -

-
- - -

- -licm: Loop Invariant Code Motion -

-
-

- This pass performs loop invariant code motion, attempting to remove as much - code from the body of a loop as possible. It does this by either hoisting - code into the preheader block, or by sinking code to the exit blocks if it is - safe. This pass also promotes must-aliased memory locations in the loop to - live in registers, thus hoisting and sinking "invariant" loads and stores. -

- -

- This pass uses alias analysis for two purposes: -

- -
    -
  • Moving loop invariant loads and calls out of loops. If we can determine - that a load or call inside of a loop never aliases anything stored to, - we can hoist it or sink it like any other instruction.
  • -
  • Scalar Promotion of Memory - If there is a store instruction inside of - the loop, we try to move the store to happen AFTER the loop instead of - inside of the loop. This can only happen if a few conditions are true: -
      -
    • The pointer stored through is loop invariant.
    • -
    • There are no stores or loads in the loop which may alias - the pointer. There are no calls in the loop which mod/ref the - pointer.
    • -
    - If these conditions are true, we can promote the loads and stores in the - loop of the pointer to use a temporary alloca'd variable. We then use - the mem2reg functionality to construct the appropriate SSA form for the - variable.
  • -
-
- - -

- -loop-deletion: Delete dead loops -

-
-

- This file implements the Dead Loop Deletion Pass. This pass is responsible - for eliminating loops with non-infinite computable trip counts that have no - side effects or volatile instructions, and do not contribute to the - computation of the function's return value. -

-
- - -

- -loop-extract: Extract loops into new functions -

-
-

- A pass wrapper around the ExtractLoop() scalar transformation to - extract each top-level loop into its own new function. If the loop is the - only loop in a given function, it is not touched. This is a pass most - useful for debugging via bugpoint. -

-
- - -

- -loop-extract-single: Extract at most one loop into a new function -

-
-

- Similar to Extract loops into new functions, - this pass extracts one natural loop from the program into a function if it - can. This is used by bugpoint. -

-
- - -

- -loop-reduce: Loop Strength Reduction -

-
-

- This pass performs a strength reduction on array references inside loops that - have as one or more of their components the loop induction variable. This is - accomplished by creating a new value to hold the initial value of the array - access for the first iteration, and then creating a new GEP instruction in - the loop to increment the value by the appropriate amount. -

-
- - -

- -loop-rotate: Rotate Loops -

-
-

A simple loop rotation transformation.

-
- - -

- -loop-simplify: Canonicalize natural loops -

-
-

- This pass performs several transformations to transform natural loops into a - simpler form, which makes subsequent analyses and transformations simpler and - more effective. -

- -

- Loop pre-header insertion guarantees that there is a single, non-critical - entry edge from outside of the loop to the loop header. This simplifies a - number of analyses and transformations, such as LICM. -

- -

- Loop exit-block insertion guarantees that all exit blocks from the loop - (blocks which are outside of the loop that have predecessors inside of the - loop) only have predecessors from inside of the loop (and are thus dominated - by the loop header). This simplifies transformations such as store-sinking - that are built into LICM. -

- -

- This pass also guarantees that loops will have exactly one backedge. -

- -

- Note that the simplifycfg pass will clean up blocks which are split out but - end up being unnecessary, so usage of this pass should not pessimize - generated code. -

- -

- This pass obviously modifies the CFG, but updates loop information and - dominator information. -

-
- - -

- -loop-unroll: Unroll loops -

-
-

- This pass implements a simple loop unroller. It works best when loops have - been canonicalized by the -indvars pass, - allowing it to determine the trip counts of loops easily. -

-
- - -

- -loop-unswitch: Unswitch loops -

-
-

- This pass transforms loops that contain branches on loop-invariant conditions - to have multiple loops. For example, it turns the left into the right code: -

- -
for (...)                  if (lic)
-  A                          for (...)
-  if (lic)                     A; B; C
-    B                      else
-  C                          for (...)
-                               A; C
- -

- This can increase the size of the code exponentially (doubling it every time - a loop is unswitched) so we only unswitch if the resultant code will be - smaller than a threshold. -

- -

- This pass expects LICM to be run before it to hoist invariant conditions out - of the loop, to make the unswitching opportunity obvious. -

-
- - -

- -loweratomic: Lower atomic intrinsics to non-atomic form -

-
-

- This pass lowers atomic intrinsics to non-atomic form for use in a known - non-preemptible environment. -

- -

- The pass does not verify that the environment is non-preemptible (in - general this would require knowledge of the entire call graph of the - program including any libraries which may not be available in bitcode form); - it simply lowers every atomic intrinsic. -

-
- - -

- -lowerinvoke: Lower invoke and unwind, for unwindless code generators -

-
-

- This transformation is designed for use by code generators which do not yet - support stack unwinding. This pass supports two models of exception handling - lowering, the 'cheap' support and the 'expensive' support. -

- -

- 'Cheap' exception handling support gives the program the ability to execute - any program which does not "throw an exception", by turning 'invoke' - instructions into calls and by turning 'unwind' instructions into calls to - abort(). If the program does dynamically use the unwind instruction, the - program will print a message then abort. -

- -

- 'Expensive' exception handling support gives the full exception handling - support to the program at the cost of making the 'invoke' instruction - really expensive. It basically inserts setjmp/longjmp calls to emulate the - exception handling as necessary. -

- -

- Because the 'expensive' support slows down programs a lot, and EH is only - used for a subset of the programs, it must be specifically enabled by the - -enable-correct-eh-support option. -

- -

- Note that after this pass runs the CFG is not entirely accurate (exceptional - control flow edges are not correct anymore) so only very simple things should - be done after the lowerinvoke pass has run (like generation of native code). - This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't - support the invoke instruction yet" lowering pass. -

-
- - -

- -lowerswitch: Lower SwitchInst's to branches -

-
-

- Rewrites switch instructions with a sequence of branches, which - allows targets to get away with not implementing the switch instruction until - it is convenient. -

-
- - -

- -mem2reg: Promote Memory to Register -

-
-

- This file promotes memory references to be register references. It promotes - alloca instructions which only have loads and - stores as uses. An alloca is transformed by using dominator - frontiers to place phi nodes, then traversing the function in - depth-first order to rewrite loads and stores as - appropriate. This is just the standard SSA construction algorithm to construct - "pruned" SSA form. -

-
- - -

- -memcpyopt: MemCpy Optimization -

-
-

- This pass performs various transformations related to eliminating memcpy - calls, or transforming sets of stores into memset's. -

-
- - -

- -mergefunc: Merge Functions -

-
-

This pass looks for equivalent functions that are mergable and folds them. - - A hash is computed from the function, based on its type and number of - basic blocks. - - Once all hashes are computed, we perform an expensive equality comparison - on each function pair. This takes n^2/2 comparisons per bucket, so it's - important that the hash function be high quality. The equality comparison - iterates through each instruction in each basic block. - - When a match is found the functions are folded. If both functions are - overridable, we move the functionality into a new internal function and - leave two overridable thunks to it. -

-
- - -

- -mergereturn: Unify function exit nodes -

-
-

- Ensure that functions have at most one ret instruction in them. - Additionally, it keeps track of which node is the new exit node of the CFG. -

-
- - -

- -partial-inliner: Partial Inliner -

-
-

This pass performs partial inlining, typically by inlining an if - statement that surrounds the body of the function. -

-
- - -

- -prune-eh: Remove unused exception handling info -

-
-

- This file implements a simple interprocedural pass which walks the call-graph, - turning invoke instructions into call instructions if and - only if the callee cannot throw an exception. It implements this as a - bottom-up traversal of the call-graph. -

-
- - -

- -reassociate: Reassociate expressions -

-
-

- This pass reassociates commutative expressions in an order that is designed - to promote better constant propagation, GCSE, LICM, PRE, etc. -

- -

- For example: 4 + (x + 5) ⇒ x + (4 + 5) -

- -

- In the implementation of this algorithm, constants are assigned rank = 0, - function arguments are rank = 1, and other values are assigned ranks - corresponding to the reverse post order traversal of current function - (starting at 2), which effectively gives values in deep loops higher rank - than values not in loops. -

-
- - -

- -reg2mem: Demote all values to stack slots -

-
-

- This file demotes all registers to memory references. It is intended to be - the inverse of -mem2reg. By converting to - load instructions, the only values live across basic blocks are - alloca instructions and load instructions before - phi nodes. It is intended that this should make CFG hacking much - easier. To make later hacking easier, the entry block is split into two, such - that all introduced alloca instructions (and nothing else) are in the - entry block. -

-
- - -

- -scalarrepl: Scalar Replacement of Aggregates (DT) -

-
-

- The well-known scalar replacement of aggregates transformation. This - transform breaks up alloca instructions of aggregate type (structure - or array) into individual alloca instructions for each member if - possible. Then, if possible, it transforms the individual alloca - instructions into nice clean scalar SSA form. -

- -

- This combines a simple scalar replacement of aggregates algorithm with the mem2reg algorithm because often interact, - especially for C++ programs. As such, iterating between scalarrepl, - then mem2reg until we run out of things to - promote works well. -

-
- - -

- -sccp: Sparse Conditional Constant Propagation -

-
-

- Sparse conditional constant propagation and merging, which can be summarized - as: -

- -
    -
  1. Assumes values are constant unless proven otherwise
  2. -
  3. Assumes BasicBlocks are dead unless proven otherwise
  4. -
  5. Proves values to be constant, and replaces them with constants
  6. -
  7. Proves conditional branches to be unconditional
  8. -
- -

- Note that this pass has a habit of making definitions be dead. It is a good - idea to to run a DCE pass sometime after running this pass. -

-
- - -

- -simplify-libcalls: Simplify well-known library calls -

-
-

- Applies a variety of small optimizations for calls to specific well-known - function calls (e.g. runtime library functions). For example, a call - exit(3) that occurs within the main() function can be - transformed into simply return 3. -

-
- - -

- -simplifycfg: Simplify the CFG -

-
-

- Performs dead code elimination and basic block merging. Specifically: -

- -
    -
  1. Removes basic blocks with no predecessors.
  2. -
  3. Merges a basic block into its predecessor if there is only one and the - predecessor only has one successor.
  4. -
  5. Eliminates PHI nodes for basic blocks with a single predecessor.
  6. -
  7. Eliminates a basic block that only contains an unconditional - branch.
  8. -
-
- - -

- -sink: Code sinking -

-
-

This pass moves instructions into successor blocks, when possible, so that - they aren't executed on paths where their results aren't needed. -

-
- - -

- -sretpromotion: Promote sret arguments to multiple ret values -

-
-

- This pass finds functions that return a struct (using a pointer to the struct - as the first argument of the function, marked with the 'sret' attribute) and - replaces them with a new function that simply returns each of the elements of - that struct (using multiple return values). -

- -

- This pass works under a number of conditions: -

- -
    -
  • The returned struct must not contain other structs
  • -
  • The returned struct must only be used to load values from
  • -
  • The placeholder struct passed in is the result of an alloca
  • -
-
- - -

- -strip: Strip all symbols from a module -

-
-

- performs code stripping. this transformation can delete: -

- -
    -
  1. names for virtual registers
  2. -
  3. symbols for internal globals and functions
  4. -
  5. debug information
  6. -
- -

- note that this transformation makes code much less readable, so it should - only be used in situations where the strip utility would be used, - such as reducing code size or making it harder to reverse engineer code. -

-
- - -

- -strip-dead-debug-info: Strip debug info for unused symbols -

-
-

- performs code stripping. this transformation can delete: -

- -
    -
  1. names for virtual registers
  2. -
  3. symbols for internal globals and functions
  4. -
  5. debug information
  6. -
- -

- note that this transformation makes code much less readable, so it should - only be used in situations where the strip utility would be used, - such as reducing code size or making it harder to reverse engineer code. -

-
- - -

- -strip-dead-prototypes: Strip Unused Function Prototypes -

-
-

- This pass loops over all of the functions in the input module, looking for - dead declarations and removes them. Dead declarations are declarations of - functions for which no implementation is available (i.e., declarations for - unused library functions). -

-
- - -

- -strip-debug-declare: Strip all llvm.dbg.declare intrinsics -

-
-

This pass implements code stripping. Specifically, it can delete:

-
    -
  • names for virtual registers
  • -
  • symbols for internal globals and functions
  • -
  • debug information
  • -
-

- Note that this transformation makes code much less readable, so it should - only be used in situations where the 'strip' utility would be used, such as - reducing code size or making it harder to reverse engineer code. -

-
- - -

- -strip-nondebug: Strip all symbols, except dbg symbols, from a module -

-
-

This pass implements code stripping. Specifically, it can delete:

-
    -
  • names for virtual registers
  • -
  • symbols for internal globals and functions
  • -
  • debug information
  • -
-

- Note that this transformation makes code much less readable, so it should - only be used in situations where the 'strip' utility would be used, such as - reducing code size or making it harder to reverse engineer code. -

-
- - -

- -tailcallelim: Tail Call Elimination -

-
-

- This file transforms calls of the current function (self recursion) followed - by a return instruction with a branch to the entry of the function, creating - a loop. This pass also implements the following extensions to the basic - algorithm: -

- -
    -
  • Trivial instructions between the call and return do not prevent the - transformation from taking place, though currently the analysis cannot - support moving any really useful instructions (only dead ones). -
  • This pass transforms functions that are prevented from being tail - recursive by an associative expression to use an accumulator variable, - thus compiling the typical naive factorial or fib implementation - into efficient code. -
  • TRE is performed if the function returns void, if the return - returns the result returned by the call, or if the function returns a - run-time constant on all exits from the function. It is possible, though - unlikely, that the return returns something else (like constant 0), and - can still be TRE'd. It can be TRE'd if all other return - instructions in the function return the exact same value. -
  • If it can prove that callees do not access theier caller stack frame, - they are marked as eligible for tail call elimination (by the code - generator). -
-
- - -

Utility Passes

-
-

This section describes the LLVM Utility Passes.

- - -

- -deadarghaX0r: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE) -

-
-

- Same as dead argument elimination, but deletes arguments to functions which - are external. This is only for use by bugpoint.

-
- - -

- -extract-blocks: Extract Basic Blocks From Module (for bugpoint use) -

-
-

- This pass is used by bugpoint to extract all blocks from the module into their - own functions.

-
- - -

- -instnamer: Assign names to anonymous instructions -

-
-

This is a little utility pass that gives instructions names, this is mostly - useful when diffing the effect of an optimization because deleting an - unnamed instruction can change all other instruction numbering, making the - diff very noisy. -

-
- - -

- -preverify: Preliminary module verification -

-
-

- Ensures that the module is in the form required by the Module Verifier pass. -

- -

- Running the verifier runs this pass automatically, so there should be no need - to use it directly. -

-
- - -

- -verify: Module Verifier -

-
-

- Verifies an LLVM IR code. This is useful to run after an optimization which is - undergoing testing. Note that llvm-as verifies its input before - emitting bitcode, and also that malformed bitcode is likely to make LLVM - crash. All language front-ends are therefore encouraged to verify their output - before performing optimizing transformations. -

- -
    -
  • Both of a binary operator's parameters are of the same type.
  • -
  • Verify that the indices of mem access instructions match other - operands.
  • -
  • Verify that arithmetic and other things are only performed on - first-class types. Verify that shifts and logicals only happen on - integrals f.e.
  • -
  • All of the constants in a switch statement are of the correct type.
  • -
  • The code is in valid SSA form.
  • -
  • It is illegal to put a label into any other type (like a structure) or - to return one.
  • -
  • Only phi nodes can be self referential: %x = add i32 %x, %x is - invalid.
  • -
  • PHI nodes must have an entry for each predecessor, with no extras.
  • -
  • PHI nodes must be the first thing in a basic block, all grouped - together.
  • -
  • PHI nodes must have at least one entry.
  • -
  • All basic blocks should only end with terminator insts, not contain - them.
  • -
  • The entry node to a function must not have predecessors.
  • -
  • All Instructions must be embedded into a basic block.
  • -
  • Functions cannot take a void-typed parameter.
  • -
  • Verify that a function's argument list agrees with its declared - type.
  • -
  • It is illegal to specify a name for a void value.
  • -
  • It is illegal to have an internal global value with no initializer.
  • -
  • It is illegal to have a ret instruction that returns a value that does - not agree with the function return value type.
  • -
  • Function call argument types match the function prototype.
  • -
  • All other things that are tested by asserts spread about the code.
  • -
- -

- Note that this does not provide full security verification (like Java), but - instead just tries to ensure that code is well-formed. -

-
- - -

- -view-cfg: View CFG of function -

-
-

- Displays the control flow graph using the GraphViz tool. -

-
- - -

- -view-cfg-only: View CFG of function (with no function bodies) -

-
-

- Displays the control flow graph using the GraphViz tool, but omitting function - bodies. -

-
- - -

- -view-dom: View dominance tree of function -

-
-

- Displays the dominator tree using the GraphViz tool. -

-
- - -

- -view-dom-only: View dominance tree of function (with no function bodies) -

-
-

- Displays the dominator tree using the GraphViz tool, but omitting function - bodies. -

-
- - -

- -view-postdom: View postdominance tree of function -

-
-

- Displays the post dominator tree using the GraphViz tool. -

-
- - -

- -view-postdom-only: View postdominance tree of function (with no function bodies) -

-
-

- Displays the post dominator tree using the GraphViz tool, but omitting - function bodies. -

-
- -
- - - -
-
- Valid CSS - Valid HTML 4.01 - - Reid Spencer
- LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-31 18:25:31 +0100 (Wed, 31 Oct 2012) $ -
- - - diff --git a/docs/Passes.rst b/docs/Passes.rst new file mode 100644 index 000000000000..d279eca3afb6 --- /dev/null +++ b/docs/Passes.rst @@ -0,0 +1,1261 @@ +.. + If Passes.html is up to date, the following "one-liner" should print + an empty diff. + + egrep -e '^-.*.*$' \ + -e '^ .*$' < Passes.html >html; \ + perl >help <<'EOT' && diff -u help html; rm -f help html + open HTML, ") { + m:^-.*.*$: or next; + $order{$1} = sprintf("%03d", 1 + int %order); + } + open HELP, "../Release/bin/opt -help|" or die "open: opt -help: $!\n"; + while () { + m:^ -([^ ]+) +- (.*)$: or next; + my $o = $order{$1}; + $o = "000" unless defined $o; + push @x, "$o-$1$2\n"; + push @y, "$o -$1: $2\n"; + } + @x = map { s/^\d\d\d//; $_ } sort @x; + @y = map { s/^\d\d\d//; $_ } sort @y; + print @x, @y; + EOT + + This (real) one-liner can also be helpful when converting comments to HTML: + + perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if !$on && $_ =~ /\S/; print "

\n" if $on && $_ =~ /^\s*$/; print " $_\n"; $on = ($_ =~ /\S/); } print "

\n" if $on' + +==================================== +LLVM's Analysis and Transform Passes +==================================== + +.. contents:: + :local: + +Introduction +============ + +This document serves as a high level summary of the optimization features that +LLVM provides. Optimizations are implemented as Passes that traverse some +portion of a program to either collect information or transform the program. +The table below divides the passes that LLVM provides into three categories. +Analysis passes compute information that other passes can use or for debugging +or program visualization purposes. Transform passes can use (or invalidate) +the analysis passes. Transform passes all mutate the program in some way. +Utility passes provides some utility but don't otherwise fit categorization. +For example passes to extract functions to bitcode or write a module to bitcode +are neither analysis nor transform passes. The table of contents above +provides a quick summary of each pass and links to the more complete pass +description later in the document. + +Analysis Passes +=============== + +This section describes the LLVM Analysis Passes. + +``-aa-eval``: Exhaustive Alias Analysis Precision Evaluator +----------------------------------------------------------- + +This is a simple N^2 alias analysis accuracy evaluator. Basically, for each +function in the program, it simply queries to see how the alias analysis +implementation answers alias queries between each pair of pointers in the +function. + +This is inspired and adapted from code by: Naveen Neelakantam, Francesco +Spadini, and Wojciech Stryjewski. + +``-basicaa``: Basic Alias Analysis (stateless AA impl) +------------------------------------------------------ + +A basic alias analysis pass that implements identities (two different globals +cannot alias, etc), but does no stateful analysis. + +``-basiccg``: Basic CallGraph Construction +------------------------------------------ + +Yet to be written. + +``-count-aa``: Count Alias Analysis Query Responses +--------------------------------------------------- + +A pass which can be used to count how many alias queries are being made and how +the alias analysis implementation being used responds. + +``-da``: Dependence Analysis +---------------------------- + +Dependence analysis framework, which is used to detect dependences in memory +accesses. + +``-debug-aa``: AA use debugger +------------------------------ + +This simple pass checks alias analysis users to ensure that if they create a +new value, they do not query AA without informing it of the value. It acts as +a shim over any other AA pass you want. + +Yes keeping track of every value in the program is expensive, but this is a +debugging pass. + +``-domfrontier``: Dominance Frontier Construction +------------------------------------------------- + +This pass is a simple dominator construction algorithm for finding forward +dominator frontiers. + +``-domtree``: Dominator Tree Construction +----------------------------------------- + +This pass is a simple dominator construction algorithm for finding forward +dominators. + + +``-dot-callgraph``: Print Call Graph to "dot" file +-------------------------------------------------- + +This pass, only available in ``opt``, prints the call graph into a ``.dot`` +graph. This graph can then be processed with the "dot" tool to convert it to +postscript or some other suitable format. + +``-dot-cfg``: Print CFG of function to "dot" file +------------------------------------------------- + +This pass, only available in ``opt``, prints the control flow graph into a +``.dot`` graph. This graph can then be processed with the :program:`dot` tool +to convert it to postscript or some other suitable format. + +``-dot-cfg-only``: Print CFG of function to "dot" file (with no function bodies) +-------------------------------------------------------------------------------- + +This pass, only available in ``opt``, prints the control flow graph into a +``.dot`` graph, omitting the function bodies. This graph can then be processed +with the :program:`dot` tool to convert it to postscript or some other suitable +format. + +``-dot-dom``: Print dominance tree of function to "dot" file +------------------------------------------------------------ + +This pass, only available in ``opt``, prints the dominator tree into a ``.dot`` +graph. This graph can then be processed with the :program:`dot` tool to +convert it to postscript or some other suitable format. + +``-dot-dom-only``: Print dominance tree of function to "dot" file (with no function bodies) +------------------------------------------------------------------------------------------- + +This pass, only available in ``opt``, prints the dominator tree into a ``.dot`` +graph, omitting the function bodies. This graph can then be processed with the +:program:`dot` tool to convert it to postscript or some other suitable format. + +``-dot-postdom``: Print postdominance tree of function to "dot" file +-------------------------------------------------------------------- + +This pass, only available in ``opt``, prints the post dominator tree into a +``.dot`` graph. This graph can then be processed with the :program:`dot` tool +to convert it to postscript or some other suitable format. + +``-dot-postdom-only``: Print postdominance tree of function to "dot" file (with no function bodies) +--------------------------------------------------------------------------------------------------- + +This pass, only available in ``opt``, prints the post dominator tree into a +``.dot`` graph, omitting the function bodies. This graph can then be processed +with the :program:`dot` tool to convert it to postscript or some other suitable +format. + +``-globalsmodref-aa``: Simple mod/ref analysis for globals +---------------------------------------------------------- + +This simple pass provides alias and mod/ref information for global values that +do not have their address taken, and keeps track of whether functions read or +write memory (are "pure"). For this simple (but very common) case, we can +provide pretty accurate and useful information. + +``-instcount``: Counts the various types of ``Instruction``\ s +-------------------------------------------------------------- + +This pass collects the count of all instructions and reports them. + +``-intervals``: Interval Partition Construction +----------------------------------------------- + +This analysis calculates and represents the interval partition of a function, +or a preexisting interval partition. + +In this way, the interval partition may be used to reduce a flow graph down to +its degenerate single node interval partition (unless it is irreducible). + +``-iv-users``: Induction Variable Users +--------------------------------------- + +Bookkeeping for "interesting" users of expressions computed from induction +variables. + +``-lazy-value-info``: Lazy Value Information Analysis +----------------------------------------------------- + +Interface for lazy computation of value constraint information. + +``-libcall-aa``: LibCall Alias Analysis +--------------------------------------- + +LibCall Alias Analysis. + +``-lint``: Statically lint-checks LLVM IR +----------------------------------------- + +This pass statically checks for common and easily-identified constructs which +produce undefined or likely unintended behavior in LLVM IR. + +It is not a guarantee of correctness, in two ways. First, it isn't +comprehensive. There are checks which could be done statically which are not +yet implemented. Some of these are indicated by TODO comments, but those +aren't comprehensive either. Second, many conditions cannot be checked +statically. This pass does no dynamic instrumentation, so it can't check for +all possible problems. + +Another limitation is that it assumes all code will be executed. A store +through a null pointer in a basic block which is never reached is harmless, but +this pass will warn about it anyway. + +Optimization passes may make conditions that this pass checks for more or less +obvious. If an optimization pass appears to be introducing a warning, it may +be that the optimization pass is merely exposing an existing condition in the +code. + +This code may be run before :ref:`instcombine `. In many +cases, instcombine checks for the same kinds of things and turns instructions +with undefined behavior into unreachable (or equivalent). Because of this, +this pass makes some effort to look through bitcasts and so on. + +``-loops``: Natural Loop Information +------------------------------------ + +This analysis is used to identify natural loops and determine the loop depth of +various nodes of the CFG. Note that the loops identified may actually be +several natural loops that share the same header node... not just a single +natural loop. + +``-memdep``: Memory Dependence Analysis +--------------------------------------- + +An analysis that determines, for a given memory operation, what preceding +memory operations it depends on. It builds on alias analysis information, and +tries to provide a lazy, caching interface to a common kind of alias +information query. + +``-module-debuginfo``: Decodes module-level debug info +------------------------------------------------------ + +This pass decodes the debug info metadata in a module and prints in a +(sufficiently-prepared-) human-readable form. + +For example, run this pass from ``opt`` along with the ``-analyze`` option, and +it'll print to standard output. + +``-no-aa``: No Alias Analysis (always returns 'may' alias) +---------------------------------------------------------- + +This is the default implementation of the Alias Analysis interface. It always +returns "I don't know" for alias queries. NoAA is unlike other alias analysis +implementations, in that it does not chain to a previous analysis. As such it +doesn't follow many of the rules that other alias analyses must. + +``-no-profile``: No Profile Information +--------------------------------------- + +The default "no profile" implementation of the abstract ``ProfileInfo`` +interface. + +``-postdomfrontier``: Post-Dominance Frontier Construction +---------------------------------------------------------- + +This pass is a simple post-dominator construction algorithm for finding +post-dominator frontiers. + +``-postdomtree``: Post-Dominator Tree Construction +-------------------------------------------------- + +This pass is a simple post-dominator construction algorithm for finding +post-dominators. + +``-print-alias-sets``: Alias Set Printer +---------------------------------------- + +Yet to be written. + +``-print-callgraph``: Print a call graph +---------------------------------------- + +This pass, only available in ``opt``, prints the call graph to standard error +in a human-readable form. + +``-print-callgraph-sccs``: Print SCCs of the Call Graph +------------------------------------------------------- + +This pass, only available in ``opt``, prints the SCCs of the call graph to +standard error in a human-readable form. + +``-print-cfg-sccs``: Print SCCs of each function CFG +---------------------------------------------------- + +This pass, only available in ``opt``, printsthe SCCs of each function CFG to +standard error in a human-readable fom. + +``-print-dbginfo``: Print debug info in human readable form +----------------------------------------------------------- + +Pass that prints instructions, and associated debug info: + +#. source/line/col information +#. original variable name +#. original type name + +``-print-dom-info``: Dominator Info Printer +------------------------------------------- + +Dominator Info Printer. + +``-print-externalfnconstants``: Print external fn callsites passed constants +---------------------------------------------------------------------------- + +This pass, only available in ``opt``, prints out call sites to external +functions that are called with constant arguments. This can be useful when +looking for standard library functions we should constant fold or handle in +alias analyses. + +``-print-function``: Print function to stderr +--------------------------------------------- + +The ``PrintFunctionPass`` class is designed to be pipelined with other +``FunctionPasses``, and prints out the functions of the module as they are +processed. + +``-print-module``: Print module to stderr +----------------------------------------- + +This pass simply prints out the entire module when it is executed. + +.. _passes-print-used-types: + +``-print-used-types``: Find Used Types +-------------------------------------- + +This pass is used to seek out all of the types in use by the program. Note +that this analysis explicitly does not include types only used by the symbol +table. + +``-profile-estimator``: Estimate profiling information +------------------------------------------------------ + +Profiling information that estimates the profiling information in a very crude +and unimaginative way. + +``-profile-loader``: Load profile information from ``llvmprof.out`` +------------------------------------------------------------------- + +A concrete implementation of profiling information that loads the information +from a profile dump file. + +``-profile-verifier``: Verify profiling information +--------------------------------------------------- + +Pass that checks profiling information for plausibility. + +``-regions``: Detect single entry single exit regions +----------------------------------------------------- + +The ``RegionInfo`` pass detects single entry single exit regions in a function, +where a region is defined as any subgraph that is connected to the remaining +graph at only two spots. Furthermore, an hierarchical region tree is built. + +``-scalar-evolution``: Scalar Evolution Analysis +------------------------------------------------ + +The ``ScalarEvolution`` analysis can be used to analyze and catagorize scalar +expressions in loops. It specializes in recognizing general induction +variables, representing them with the abstract and opaque ``SCEV`` class. +Given this analysis, trip counts of loops and other important properties can be +obtained. + +This analysis is primarily useful for induction variable substitution and +strength reduction. + +``-scev-aa``: ScalarEvolution-based Alias Analysis +-------------------------------------------------- + +Simple alias analysis implemented in terms of ``ScalarEvolution`` queries. + +This differs from traditional loop dependence analysis in that it tests for +dependencies within a single iteration of a loop, rather than dependencies +between different iterations. + +``ScalarEvolution`` has a more complete understanding of pointer arithmetic +than ``BasicAliasAnalysis``' collection of ad-hoc analyses. + +``-targetdata``: Target Data Layout +----------------------------------- + +Provides other passes access to information on how the size and alignment +required by the target ABI for various data types. + +Transform Passes +================ + +This section describes the LLVM Transform Passes. + +``-adce``: Aggressive Dead Code Elimination +------------------------------------------- + +ADCE aggressively tries to eliminate code. This pass is similar to :ref:`DCE +` but it assumes that values are dead until proven otherwise. This +is similar to :ref:`SCCP `, except applied to the liveness of +values. + +``-always-inline``: Inliner for ``always_inline`` functions +----------------------------------------------------------- + +A custom inliner that handles only functions that are marked as "always +inline". + +``-argpromotion``: Promote 'by reference' arguments to scalars +-------------------------------------------------------------- + +This pass promotes "by reference" arguments to be "by value" arguments. In +practice, this means looking for internal functions that have pointer +arguments. If it can prove, through the use of alias analysis, that an +argument is *only* loaded, then it can pass the value into the function instead +of the address of the value. This can cause recursive simplification of code +and lead to the elimination of allocas (especially in C++ template code like +the STL). + +This pass also handles aggregate arguments that are passed into a function, +scalarizing them if the elements of the aggregate are only loaded. Note that +it refuses to scalarize aggregates which would require passing in more than +three operands to the function, because passing thousands of operands for a +large array or structure is unprofitable! + +Note that this transformation could also be done for arguments that are only +stored to (returning the value instead), but does not currently. This case +would be best handled when and if LLVM starts supporting multiple return values +from functions. + +``-bb-vectorize``: Basic-Block Vectorization +-------------------------------------------- + +This pass combines instructions inside basic blocks to form vector +instructions. It iterates over each basic block, attempting to pair compatible +instructions, repeating this process until no additional pairs are selected for +vectorization. When the outputs of some pair of compatible instructions are +used as inputs by some other pair of compatible instructions, those pairs are +part of a potential vectorization chain. Instruction pairs are only fused into +vector instructions when they are part of a chain longer than some threshold +length. Moreover, the pass attempts to find the best possible chain for each +pair of compatible instructions. These heuristics are intended to prevent +vectorization in cases where it would not yield a performance increase of the +resulting code. + +``-block-placement``: Profile Guided Basic Block Placement +---------------------------------------------------------- + +This pass is a very simple profile guided basic block placement algorithm. The +idea is to put frequently executed blocks together at the start of the function +and hopefully increase the number of fall-through conditional branches. If +there is no profile information for a particular function, this pass basically +orders blocks in depth-first order. + +``-break-crit-edges``: Break critical edges in CFG +-------------------------------------------------- + +Break all of the critical edges in the CFG by inserting a dummy basic block. +It may be "required" by passes that cannot deal with critical edges. This +transformation obviously invalidates the CFG, but can update forward dominator +(set, immediate dominators, tree, and frontier) information. + +``-codegenprepare``: Optimize for code generation +------------------------------------------------- + +This pass munges the code in the input function to better prepare it for +SelectionDAG-based code generation. This works around limitations in it's +basic-block-at-a-time approach. It should eventually be removed. + +``-constmerge``: Merge Duplicate Global Constants +------------------------------------------------- + +Merges duplicate global constants together into a single constant that is +shared. This is useful because some passes (i.e., TraceValues) insert a lot of +string constants into the program, regardless of whether or not an existing +string is available. + +``-constprop``: Simple constant propagation +------------------------------------------- + +This file implements constant propagation and merging. It looks for +instructions involving only constant operands and replaces them with a constant +value instead of an instruction. For example: + +.. code-block:: llvm + + add i32 1, 2 + +becomes + +.. code-block:: llvm + + i32 3 + +NOTE: this pass has a habit of making definitions be dead. It is a good idea +to to run a :ref:`Dead Instruction Elimination ` pass sometime +after running this pass. + +.. _passes-dce: + +``-dce``: Dead Code Elimination +------------------------------- + +Dead code elimination is similar to :ref:`dead instruction elimination +`, but it rechecks instructions that were used by removed +instructions to see if they are newly dead. + +``-deadargelim``: Dead Argument Elimination +------------------------------------------- + +This pass deletes dead arguments from internal functions. Dead argument +elimination removes arguments which are directly dead, as well as arguments +only passed into function calls as dead arguments of other functions. This +pass also deletes dead arguments in a similar way. + +This pass is often useful as a cleanup pass to run after aggressive +interprocedural passes, which add possibly-dead arguments. + +``-deadtypeelim``: Dead Type Elimination +---------------------------------------- + +This pass is used to cleanup the output of GCC. It eliminate names for types +that are unused in the entire translation unit, using the :ref:`find used types +` pass. + +.. _passes-die: + +``-die``: Dead Instruction Elimination +-------------------------------------- + +Dead instruction elimination performs a single pass over the function, removing +instructions that are obviously dead. + +``-dse``: Dead Store Elimination +-------------------------------- + +A trivial dead store elimination that only considers basic-block local +redundant stores. + +``-functionattrs``: Deduce function attributes +---------------------------------------------- + +A simple interprocedural pass which walks the call-graph, looking for functions +which do not access or only read non-local memory, and marking them +``readnone``/``readonly``. In addition, it marks function arguments (of +pointer type) "``nocapture``" if a call to the function does not create any +copies of the pointer value that outlive the call. This more or less means +that the pointer is only dereferenced, and not returned from the function or +stored in a global. This pass is implemented as a bottom-up traversal of the +call-graph. + +``-globaldce``: Dead Global Elimination +--------------------------------------- + +This transform is designed to eliminate unreachable internal globals from the +program. It uses an aggressive algorithm, searching out globals that are known +to be alive. After it finds all of the globals which are needed, it deletes +whatever is left over. This allows it to delete recursive chunks of the +program which are unreachable. + +``-globalopt``: Global Variable Optimizer +----------------------------------------- + +This pass transforms simple global variables that never have their address +taken. If obviously true, it marks read/write globals as constant, deletes +variables only stored to, etc. + +``-gvn``: Global Value Numbering +-------------------------------- + +This pass performs global value numbering to eliminate fully and partially +redundant instructions. It also performs redundant load elimination. + +.. _passes-indvars: + +``-indvars``: Canonicalize Induction Variables +---------------------------------------------- + +This transformation analyzes and transforms the induction variables (and +computations derived from them) into simpler forms suitable for subsequent +analysis and transformation. + +This transformation makes the following changes to each loop with an +identifiable induction variable: + +* All loops are transformed to have a *single* canonical induction variable + which starts at zero and steps by one. +* The canonical induction variable is guaranteed to be the first PHI node in + the loop header block. +* Any pointer arithmetic recurrences are raised to use array subscripts. + +If the trip count of a loop is computable, this pass also makes the following +changes: + +* The exit condition for the loop is canonicalized to compare the induction + value against the exit value. This turns loops like: + + .. code-block:: c++ + + for (i = 7; i*i < 1000; ++i) + + into + + .. code-block:: c++ + + for (i = 0; i != 25; ++i) + +* Any use outside of the loop of an expression derived from the indvar is + changed to compute the derived value outside of the loop, eliminating the + dependence on the exit value of the induction variable. If the only purpose + of the loop is to compute the exit value of some derived expression, this + transformation will make the loop dead. + +This transformation should be followed by strength reduction after all of the +desired loop transformations have been performed. Additionally, on targets +where it is profitable, the loop could be transformed to count down to zero +(the "do loop" optimization). + +``-inline``: Function Integration/Inlining +------------------------------------------ + +Bottom-up inlining of functions into callees. + +``-insert-edge-profiling``: Insert instrumentation for edge profiling +--------------------------------------------------------------------- + +This pass instruments the specified program with counters for edge profiling. +Edge profiling can give a reasonable approximation of the hot paths through a +program, and is used for a wide variety of program transformations. + +Note that this implementation is very naïve. It inserts a counter for *every* +edge in the program, instead of using control flow information to prune the +number of counters inserted. + +``-insert-optimal-edge-profiling``: Insert optimal instrumentation for edge profiling +------------------------------------------------------------------------------------- + +This pass instruments the specified program with counters for edge profiling. +Edge profiling can give a reasonable approximation of the hot paths through a +program, and is used for a wide variety of program transformations. + +.. _passes-instcombine: + +``-instcombine``: Combine redundant instructions +------------------------------------------------ + +Combine instructions to form fewer, simple instructions. This pass does not +modify the CFG This pass is where algebraic simplification happens. + +This pass combines things like: + +.. code-block:: llvm + + %Y = add i32 %X, 1 + %Z = add i32 %Y, 1 + +into: + +.. code-block:: llvm + + %Z = add i32 %X, 2 + +This is a simple worklist driven algorithm. + +This pass guarantees that the following canonicalizations are performed on the +program: + +#. If a binary operator has a constant operand, it is moved to the right-hand + side. +#. Bitwise operators with constant operands are always grouped so that shifts + are performed first, then ``or``\ s, then ``and``\ s, then ``xor``\ s. +#. Compare instructions are converted from ``<``, ``>``, ``≤``, or ``≥`` to + ``=`` or ``≠`` if possible. +#. All ``cmp`` instructions on boolean values are replaced with logical + operations. +#. ``add X, X`` is represented as ``mul X, 2`` ⇒ ``shl X, 1`` +#. Multiplies with a constant power-of-two argument are transformed into + shifts. +#. … etc. + +``-internalize``: Internalize Global Symbols +-------------------------------------------- + +This pass loops over all of the functions in the input module, looking for a +main function. If a main function is found, all other functions and all global +variables with initializers are marked as internal. + +``-ipconstprop``: Interprocedural constant propagation +------------------------------------------------------ + +This pass implements an *extremely* simple interprocedural constant propagation +pass. It could certainly be improved in many different ways, like using a +worklist. This pass makes arguments dead, but does not remove them. The +existing dead argument elimination pass should be run after this to clean up +the mess. + +``-ipsccp``: Interprocedural Sparse Conditional Constant Propagation +-------------------------------------------------------------------- + +An interprocedural variant of :ref:`Sparse Conditional Constant Propagation +`. + +``-jump-threading``: Jump Threading +----------------------------------- + +Jump threading tries to find distinct threads of control flow running through a +basic block. This pass looks at blocks that have multiple predecessors and +multiple successors. If one or more of the predecessors of the block can be +proven to always cause a jump to one of the successors, we forward the edge +from the predecessor to the successor by duplicating the contents of this +block. + +An example of when this can occur is code like this: + +.. code-block:: c++ + + if () { ... + X = 4; + } + if (X < 3) { + +In this case, the unconditional branch at the end of the first if can be +revectored to the false side of the second if. + +``-lcssa``: Loop-Closed SSA Form Pass +------------------------------------- + +This pass transforms loops by placing phi nodes at the end of the loops for all +values that are live across the loop boundary. For example, it turns the left +into the right code: + +.. code-block:: c++ + + for (...) for (...) + if (c) if (c) + X1 = ... X1 = ... + else else + X2 = ... X2 = ... + X3 = phi(X1, X2) X3 = phi(X1, X2) + ... = X3 + 4 X4 = phi(X3) + ... = X4 + 4 + +This is still valid LLVM; the extra phi nodes are purely redundant, and will be +trivially eliminated by ``InstCombine``. The major benefit of this +transformation is that it makes many other loop optimizations, such as +``LoopUnswitch``\ ing, simpler. + +.. _passes-licm: + +``-licm``: Loop Invariant Code Motion +------------------------------------- + +This pass performs loop invariant code motion, attempting to remove as much +code from the body of a loop as possible. It does this by either hoisting code +into the preheader block, or by sinking code to the exit blocks if it is safe. +This pass also promotes must-aliased memory locations in the loop to live in +registers, thus hoisting and sinking "invariant" loads and stores. + +This pass uses alias analysis for two purposes: + +#. Moving loop invariant loads and calls out of loops. If we can determine + that a load or call inside of a loop never aliases anything stored to, we + can hoist it or sink it like any other instruction. + +#. Scalar Promotion of Memory. If there is a store instruction inside of the + loop, we try to move the store to happen AFTER the loop instead of inside of + the loop. This can only happen if a few conditions are true: + + #. The pointer stored through is loop invariant. + #. There are no stores or loads in the loop which *may* alias the pointer. + There are no calls in the loop which mod/ref the pointer. + + If these conditions are true, we can promote the loads and stores in the + loop of the pointer to use a temporary alloca'd variable. We then use the + :ref:`mem2reg ` functionality to construct the appropriate + SSA form for the variable. + +``-loop-deletion``: Delete dead loops +------------------------------------- + +This file implements the Dead Loop Deletion Pass. This pass is responsible for +eliminating loops with non-infinite computable trip counts that have no side +effects or volatile instructions, and do not contribute to the computation of +the function's return value. + +.. _passes-loop-extract: + +``-loop-extract``: Extract loops into new functions +--------------------------------------------------- + +A pass wrapper around the ``ExtractLoop()`` scalar transformation to extract +each top-level loop into its own new function. If the loop is the *only* loop +in a given function, it is not touched. This is a pass most useful for +debugging via bugpoint. + +``-loop-extract-single``: Extract at most one loop into a new function +---------------------------------------------------------------------- + +Similar to :ref:`Extract loops into new functions `, this +pass extracts one natural loop from the program into a function if it can. +This is used by :program:`bugpoint`. + +``-loop-reduce``: Loop Strength Reduction +----------------------------------------- + +This pass performs a strength reduction on array references inside loops that +have as one or more of their components the loop induction variable. This is +accomplished by creating a new value to hold the initial value of the array +access for the first iteration, and then creating a new GEP instruction in the +loop to increment the value by the appropriate amount. + +``-loop-rotate``: Rotate Loops +------------------------------ + +A simple loop rotation transformation. + +``-loop-simplify``: Canonicalize natural loops +---------------------------------------------- + +This pass performs several transformations to transform natural loops into a +simpler form, which makes subsequent analyses and transformations simpler and +more effective. + +Loop pre-header insertion guarantees that there is a single, non-critical entry +edge from outside of the loop to the loop header. This simplifies a number of +analyses and transformations, such as :ref:`LICM `. + +Loop exit-block insertion guarantees that all exit blocks from the loop (blocks +which are outside of the loop that have predecessors inside of the loop) only +have predecessors from inside of the loop (and are thus dominated by the loop +header). This simplifies transformations such as store-sinking that are built +into LICM. + +This pass also guarantees that loops will have exactly one backedge. + +Note that the :ref:`simplifycfg ` pass will clean up blocks +which are split out but end up being unnecessary, so usage of this pass should +not pessimize generated code. + +This pass obviously modifies the CFG, but updates loop information and +dominator information. + +``-loop-unroll``: Unroll loops +------------------------------ + +This pass implements a simple loop unroller. It works best when loops have +been canonicalized by the :ref:`indvars ` pass, allowing it to +determine the trip counts of loops easily. + +``-loop-unswitch``: Unswitch loops +---------------------------------- + +This pass transforms loops that contain branches on loop-invariant conditions +to have multiple loops. For example, it turns the left into the right code: + +.. code-block:: c++ + + for (...) if (lic) + A for (...) + if (lic) A; B; C + B else + C for (...) + A; C + +This can increase the size of the code exponentially (doubling it every time a +loop is unswitched) so we only unswitch if the resultant code will be smaller +than a threshold. + +This pass expects :ref:`LICM ` to be run before it to hoist +invariant conditions out of the loop, to make the unswitching opportunity +obvious. + +``-loweratomic``: Lower atomic intrinsics to non-atomic form +------------------------------------------------------------ + +This pass lowers atomic intrinsics to non-atomic form for use in a known +non-preemptible environment. + +The pass does not verify that the environment is non-preemptible (in general +this would require knowledge of the entire call graph of the program including +any libraries which may not be available in bitcode form); it simply lowers +every atomic intrinsic. + +``-lowerinvoke``: Lower invoke and unwind, for unwindless code generators +------------------------------------------------------------------------- + +This transformation is designed for use by code generators which do not yet +support stack unwinding. This pass supports two models of exception handling +lowering, the "cheap" support and the "expensive" support. + +"Cheap" exception handling support gives the program the ability to execute any +program which does not "throw an exception", by turning "``invoke``" +instructions into calls and by turning "``unwind``" instructions into calls to +``abort()``. If the program does dynamically use the "``unwind``" instruction, +the program will print a message then abort. + +"Expensive" exception handling support gives the full exception handling +support to the program at the cost of making the "``invoke``" instruction +really expensive. It basically inserts ``setjmp``/``longjmp`` calls to emulate +the exception handling as necessary. + +Because the "expensive" support slows down programs a lot, and EH is only used +for a subset of the programs, it must be specifically enabled by the +``-enable-correct-eh-support`` option. + +Note that after this pass runs the CFG is not entirely accurate (exceptional +control flow edges are not correct anymore) so only very simple things should +be done after the ``lowerinvoke`` pass has run (like generation of native +code). This should not be used as a general purpose "my LLVM-to-LLVM pass +doesn't support the ``invoke`` instruction yet" lowering pass. + +``-lowerswitch``: Lower ``SwitchInst``\ s to branches +----------------------------------------------------- + +Rewrites switch instructions with a sequence of branches, which allows targets +to get away with not implementing the switch instruction until it is +convenient. + +.. _passes-mem2reg: + +``-mem2reg``: Promote Memory to Register +---------------------------------------- + +This file promotes memory references to be register references. It promotes +alloca instructions which only have loads and stores as uses. An ``alloca`` is +transformed by using dominator frontiers to place phi nodes, then traversing +the function in depth-first order to rewrite loads and stores as appropriate. +This is just the standard SSA construction algorithm to construct "pruned" SSA +form. + +``-memcpyopt``: MemCpy Optimization +----------------------------------- + +This pass performs various transformations related to eliminating ``memcpy`` +calls, or transforming sets of stores into ``memset``\ s. + +``-mergefunc``: Merge Functions +------------------------------- + +This pass looks for equivalent functions that are mergable and folds them. + +A hash is computed from the function, based on its type and number of basic +blocks. + +Once all hashes are computed, we perform an expensive equality comparison on +each function pair. This takes n^2/2 comparisons per bucket, so it's important +that the hash function be high quality. The equality comparison iterates +through each instruction in each basic block. + +When a match is found the functions are folded. If both functions are +overridable, we move the functionality into a new internal function and leave +two overridable thunks to it. + +``-mergereturn``: Unify function exit nodes +------------------------------------------- + +Ensure that functions have at most one ``ret`` instruction in them. +Additionally, it keeps track of which node is the new exit node of the CFG. + +``-partial-inliner``: Partial Inliner +------------------------------------- + +This pass performs partial inlining, typically by inlining an ``if`` statement +that surrounds the body of the function. + +``-prune-eh``: Remove unused exception handling info +---------------------------------------------------- + +This file implements a simple interprocedural pass which walks the call-graph, +turning invoke instructions into call instructions if and only if the callee +cannot throw an exception. It implements this as a bottom-up traversal of the +call-graph. + +``-reassociate``: Reassociate expressions +----------------------------------------- + +This pass reassociates commutative expressions in an order that is designed to +promote better constant propagation, GCSE, :ref:`LICM `, PRE, etc. + +For example: 4 + (x + 5) ⇒ x + (4 + 5) + +In the implementation of this algorithm, constants are assigned rank = 0, +function arguments are rank = 1, and other values are assigned ranks +corresponding to the reverse post order traversal of current function (starting +at 2), which effectively gives values in deep loops higher rank than values not +in loops. + +``-reg2mem``: Demote all values to stack slots +---------------------------------------------- + +This file demotes all registers to memory references. It is intended to be the +inverse of :ref:`mem2reg `. By converting to ``load`` +instructions, the only values live across basic blocks are ``alloca`` +instructions and ``load`` instructions before ``phi`` nodes. It is intended +that this should make CFG hacking much easier. To make later hacking easier, +the entry block is split into two, such that all introduced ``alloca`` +instructions (and nothing else) are in the entry block. + +``-scalarrepl``: Scalar Replacement of Aggregates (DT) +------------------------------------------------------ + +The well-known scalar replacement of aggregates transformation. This transform +breaks up ``alloca`` instructions of aggregate type (structure or array) into +individual ``alloca`` instructions for each member if possible. Then, if +possible, it transforms the individual ``alloca`` instructions into nice clean +scalar SSA form. + +This combines a simple scalar replacement of aggregates algorithm with the +:ref:`mem2reg ` algorithm because they often interact, +especially for C++ programs. As such, iterating between ``scalarrepl``, then +:ref:`mem2reg ` until we run out of things to promote works +well. + +.. _passes-sccp: + +``-sccp``: Sparse Conditional Constant Propagation +-------------------------------------------------- + +Sparse conditional constant propagation and merging, which can be summarized +as: + +* Assumes values are constant unless proven otherwise +* Assumes BasicBlocks are dead unless proven otherwise +* Proves values to be constant, and replaces them with constants +* Proves conditional branches to be unconditional + +Note that this pass has a habit of making definitions be dead. It is a good +idea to to run a :ref:`DCE ` pass sometime after running this pass. + +``-simplify-libcalls``: Simplify well-known library calls +--------------------------------------------------------- + +Applies a variety of small optimizations for calls to specific well-known +function calls (e.g. runtime library functions). For example, a call +``exit(3)`` that occurs within the ``main()`` function can be transformed into +simply ``return 3``. + +.. _passes-simplifycfg: + +``-simplifycfg``: Simplify the CFG +---------------------------------- + +Performs dead code elimination and basic block merging. Specifically: + +* Removes basic blocks with no predecessors. +* Merges a basic block into its predecessor if there is only one and the + predecessor only has one successor. +* Eliminates PHI nodes for basic blocks with a single predecessor. +* Eliminates a basic block that only contains an unconditional branch. + +``-sink``: Code sinking +----------------------- + +This pass moves instructions into successor blocks, when possible, so that they +aren't executed on paths where their results aren't needed. + +``-strip``: Strip all symbols from a module +------------------------------------------- + +Performs code stripping. This transformation can delete: + +* names for virtual registers +* symbols for internal globals and functions +* debug information + +Note that this transformation makes code much less readable, so it should only +be used in situations where the strip utility would be used, such as reducing +code size or making it harder to reverse engineer code. + +``-strip-dead-debug-info``: Strip debug info for unused symbols +--------------------------------------------------------------- + +.. FIXME: this description is the same as for -strip + +performs code stripping. this transformation can delete: + +* names for virtual registers +* symbols for internal globals and functions +* debug information + +note that this transformation makes code much less readable, so it should only +be used in situations where the strip utility would be used, such as reducing +code size or making it harder to reverse engineer code. + +``-strip-dead-prototypes``: Strip Unused Function Prototypes +------------------------------------------------------------ + +This pass loops over all of the functions in the input module, looking for dead +declarations and removes them. Dead declarations are declarations of functions +for which no implementation is available (i.e., declarations for unused library +functions). + +``-strip-debug-declare``: Strip all ``llvm.dbg.declare`` intrinsics +------------------------------------------------------------------- + +.. FIXME: this description is the same as for -strip + +This pass implements code stripping. Specifically, it can delete: + +#. names for virtual registers +#. symbols for internal globals and functions +#. debug information + +Note that this transformation makes code much less readable, so it should only +be used in situations where the 'strip' utility would be used, such as reducing +code size or making it harder to reverse engineer code. + +``-strip-nondebug``: Strip all symbols, except dbg symbols, from a module +------------------------------------------------------------------------- + +.. FIXME: this description is the same as for -strip + +This pass implements code stripping. Specifically, it can delete: + +#. names for virtual registers +#. symbols for internal globals and functions +#. debug information + +Note that this transformation makes code much less readable, so it should only +be used in situations where the 'strip' utility would be used, such as reducing +code size or making it harder to reverse engineer code. + +``-tailcallelim``: Tail Call Elimination +---------------------------------------- + +This file transforms calls of the current function (self recursion) followed by +a return instruction with a branch to the entry of the function, creating a +loop. This pass also implements the following extensions to the basic +algorithm: + +#. Trivial instructions between the call and return do not prevent the + transformation from taking place, though currently the analysis cannot + support moving any really useful instructions (only dead ones). +#. This pass transforms functions that are prevented from being tail recursive + by an associative expression to use an accumulator variable, thus compiling + the typical naive factorial or fib implementation into efficient code. +#. TRE is performed if the function returns void, if the return returns the + result returned by the call, or if the function returns a run-time constant + on all exits from the function. It is possible, though unlikely, that the + return returns something else (like constant 0), and can still be TRE'd. It + can be TRE'd if *all other* return instructions in the function return the + exact same value. +#. If it can prove that callees do not access theier caller stack frame, they + are marked as eligible for tail call elimination (by the code generator). + +Utility Passes +============== + +This section describes the LLVM Utility Passes. + +``-deadarghaX0r``: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE) +------------------------------------------------------------------------ + +Same as dead argument elimination, but deletes arguments to functions which are +external. This is only for use by :doc:`bugpoint `. + +``-extract-blocks``: Extract Basic Blocks From Module (for bugpoint use) +------------------------------------------------------------------------ + +This pass is used by bugpoint to extract all blocks from the module into their +own functions. + +``-instnamer``: Assign names to anonymous instructions +------------------------------------------------------ + +This is a little utility pass that gives instructions names, this is mostly +useful when diffing the effect of an optimization because deleting an unnamed +instruction can change all other instruction numbering, making the diff very +noisy. + +``-preverify``: Preliminary module verification +----------------------------------------------- + +Ensures that the module is in the form required by the :ref:`Module Verifier +` pass. Running the verifier runs this pass automatically, so +there should be no need to use it directly. + +.. _passes-verify: + +``-verify``: Module Verifier +---------------------------- + +Verifies an LLVM IR code. This is useful to run after an optimization which is +undergoing testing. Note that llvm-as verifies its input before emitting +bitcode, and also that malformed bitcode is likely to make LLVM crash. All +language front-ends are therefore encouraged to verify their output before +performing optimizing transformations. + +#. Both of a binary operator's parameters are of the same type. +#. Verify that the indices of mem access instructions match other operands. +#. Verify that arithmetic and other things are only performed on first-class + types. Verify that shifts and logicals only happen on integrals f.e. +#. All of the constants in a switch statement are of the correct type. +#. The code is in valid SSA form. +#. It is illegal to put a label into any other type (like a structure) or to + return one. +#. Only phi nodes can be self referential: ``%x = add i32 %x``, ``%x`` is + invalid. +#. PHI nodes must have an entry for each predecessor, with no extras. +#. PHI nodes must be the first thing in a basic block, all grouped together. +#. PHI nodes must have at least one entry. +#. All basic blocks should only end with terminator insts, not contain them. +#. The entry node to a function must not have predecessors. +#. All Instructions must be embedded into a basic block. +#. Functions cannot take a void-typed parameter. +#. Verify that a function's argument list agrees with its declared type. +#. It is illegal to specify a name for a void value. +#. It is illegal to have an internal global value with no initializer. +#. It is illegal to have a ``ret`` instruction that returns a value that does + not agree with the function return value type. +#. Function call argument types match the function prototype. +#. All other things that are tested by asserts spread about the code. + +Note that this does not provide full security verification (like Java), but +instead just tries to ensure that code is well-formed. + +``-view-cfg``: View CFG of function +----------------------------------- + +Displays the control flow graph using the GraphViz tool. + +``-view-cfg-only``: View CFG of function (with no function bodies) +------------------------------------------------------------------ + +Displays the control flow graph using the GraphViz tool, but omitting function +bodies. + +``-view-dom``: View dominance tree of function +---------------------------------------------- + +Displays the dominator tree using the GraphViz tool. + +``-view-dom-only``: View dominance tree of function (with no function bodies) +----------------------------------------------------------------------------- + +Displays the dominator tree using the GraphViz tool, but omitting function +bodies. + +``-view-postdom``: View postdominance tree of function +------------------------------------------------------ + +Displays the post dominator tree using the GraphViz tool. + +``-view-postdom-only``: View postdominance tree of function (with no function bodies) +------------------------------------------------------------------------------------- + +Displays the post dominator tree using the GraphViz tool, but omitting function +bodies. + diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index b45449793e0a..efab10cd13a5 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -88,6 +88,12 @@ diffs between different versions of the patch as it was reviewed in the *Revision Update History*. Most features are self descriptive - explore, and if you have a question, drop by on #llvm in IRC to get help. +Note that as e-mail is the system of reference for code reviews, and some +people prefer it over a web interface, we do not generate automated mail +when a review changes state, for example by clicking "Accept Revision" in +the web interface. Thus, please type LGTM into the comment box to accept +a change from Phabricator. + Status ------ diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html deleted file mode 100644 index 7c2e6c8aad92..000000000000 --- a/docs/ProgrammersManual.html +++ /dev/null @@ -1,4156 +0,0 @@ - - - - - LLVM Programmer's Manual - - - - -

- LLVM Programmer's Manual -

- -
    -
  1. Introduction
  2. -
  3. General Information - -
  4. -
  5. Important and useful LLVM APIs - -
  6. -
  7. Picking the Right Data Structure for a Task - -
  8. -
  9. Helpful Hints for Common Operations - -
  10. - -
  11. Threads and LLVM - -
  12. - -
  13. Advanced Topics -
  14. - -
  15. The Core LLVM Class Hierarchy Reference - -
  16. -
- - - - -

- Introduction -

- - -
- -

This document is meant to highlight some of the important classes and -interfaces available in the LLVM source-base. This manual is not -intended to explain what LLVM is, how it works, and what LLVM code looks -like. It assumes that you know the basics of LLVM and are interested -in writing transformations or otherwise analyzing or manipulating the -code.

- -

This document should get you oriented so that you can find your -way in the continuously growing source code that makes up the LLVM -infrastructure. Note that this manual is not intended to serve as a -replacement for reading the source code, so if you think there should be -a method in one of these classes to do something, but it's not listed, -check the source. Links to the doxygen sources -are provided to make this as easy as possible.

- -

The first section of this document describes general information that is -useful to know when working in the LLVM infrastructure, and the second describes -the Core LLVM classes. In the future this manual will be extended with -information describing how to use extension libraries, such as dominator -information, CFG traversal routines, and useful utilities like the InstVisitor template.

- -
- - -

- General Information -

- - -
- -

This section contains general information that is useful if you are working -in the LLVM source-base, but that isn't specific to any particular API.

- - -

- The C++ Standard Template Library -

- -
- -

LLVM makes heavy use of the C++ Standard Template Library (STL), -perhaps much more than you are used to, or have seen before. Because of -this, you might want to do a little background reading in the -techniques used and capabilities of the library. There are many good -pages that discuss the STL, and several books on the subject that you -can get, so it will not be discussed in this document.

- -

Here are some useful links:

- -
    - -
  1. Dinkumware -C++ Library reference - an excellent reference for the STL and other parts -of the standard C++ library.
  2. - -
  3. C++ In a Nutshell - This is an -O'Reilly book in the making. It has a decent Standard Library -Reference that rivals Dinkumware's, and is unfortunately no longer free since the -book has been published.
  4. - -
  5. C++ Frequently Asked -Questions
  6. - -
  7. SGI's STL Programmer's Guide - -Contains a useful Introduction to the -STL.
  8. - -
  9. Bjarne Stroustrup's C++ -Page
  10. - -
  11. -Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get -the book).
  12. - -
- -

You are also encouraged to take a look at the LLVM Coding Standards guide which focuses on how -to write maintainable code more than where to put your curly braces.

- -
- - -

- Other useful references -

- - - -
- - -

- Important and useful LLVM APIs -

- - -
- -

Here we highlight some LLVM APIs that are generally useful and good to -know about when writing transformations.

- - -

- The isa<>, cast<> and - dyn_cast<> templates -

- -
- -

The LLVM source-base makes extensive use of a custom form of RTTI. -These templates have many similarities to the C++ dynamic_cast<> -operator, but they don't have some drawbacks (primarily stemming from -the fact that dynamic_cast<> only works on classes that -have a v-table). Because they are used so often, you must know what they -do and how they work. All of these templates are defined in the llvm/Support/Casting.h -file (note that you very rarely have to include this file directly).

- -
-
isa<>:
- -

The isa<> operator works exactly like the Java - "instanceof" operator. It returns true or false depending on whether - a reference or pointer points to an instance of the specified class. This can - be very useful for constraint checking of various sorts (example below).

-
- -
cast<>:
- -

The cast<> operator is a "checked cast" operation. It - converts a pointer or reference from a base class to a derived class, causing - an assertion failure if it is not really an instance of the right type. This - should be used in cases where you have some information that makes you believe - that something is of the right type. An example of the isa<> - and cast<> template is:

- -
-
-static bool isLoopInvariant(const Value *V, const Loop *L) {
-  if (isa<Constant>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
-    return true;
-
-  // Otherwise, it must be an instruction...
-  return !L->contains(cast<Instruction>(V)->getParent());
-}
-
-
- -

Note that you should not use an isa<> test followed - by a cast<>, for that use the dyn_cast<> - operator.

- -
- -
dyn_cast<>:
- -

The dyn_cast<> operator is a "checking cast" operation. - It checks to see if the operand is of the specified type, and if so, returns a - pointer to it (this operator does not work with references). If the operand is - not of the correct type, a null pointer is returned. Thus, this works very - much like the dynamic_cast<> operator in C++, and should be - used in the same circumstances. Typically, the dyn_cast<> - operator is used in an if statement or some other flow control - statement like this:

- -
-
-if (AllocationInst *AI = dyn_cast<AllocationInst>(Val)) {
-  // ...
-}
-
-
- -

This form of the if statement effectively combines together a call - to isa<> and a call to cast<> into one - statement, which is very convenient.

- -

Note that the dyn_cast<> operator, like C++'s - dynamic_cast<> or Java's instanceof operator, can be - abused. In particular, you should not use big chained if/then/else - blocks to check for lots of different variants of classes. If you find - yourself wanting to do this, it is much cleaner and more efficient to use the - InstVisitor class to dispatch over the instruction type directly.

- -
- -
cast_or_null<>:
- -

The cast_or_null<> operator works just like the - cast<> operator, except that it allows for a null pointer as an - argument (which it then propagates). This can sometimes be useful, allowing - you to combine several null checks into one.

- -
dyn_cast_or_null<>:
- -

The dyn_cast_or_null<> operator works just like the - dyn_cast<> operator, except that it allows for a null pointer - as an argument (which it then propagates). This can sometimes be useful, - allowing you to combine several null checks into one.

- -
- -

These five templates can be used with any classes, whether they have a -v-table or not. If you want to add support for these templates, see the -document How to set up LLVM-style -RTTI for your class hierarchy . -

- -
- - - -

- Passing strings (the StringRef -and Twine classes) -

- -
- -

Although LLVM generally does not do much string manipulation, we do have -several important APIs which take strings. Two important examples are the -Value class -- which has names for instructions, functions, etc. -- and the -StringMap class which is used extensively in LLVM and Clang.

- -

These are generic classes, and they need to be able to accept strings which -may have embedded null characters. Therefore, they cannot simply take -a const char *, and taking a const std::string& requires -clients to perform a heap allocation which is usually unnecessary. Instead, -many LLVM APIs use a StringRef or a const Twine& for -passing strings efficiently.

- - -

- The StringRef class -

- -
- -

The StringRef data type represents a reference to a constant string -(a character array and a length) and supports the common operations available -on std:string, but does not require heap allocation.

- -

It can be implicitly constructed using a C style null-terminated string, -an std::string, or explicitly with a character pointer and length. -For example, the StringRef find function is declared as:

- -
-  iterator find(StringRef Key);
-
- -

and clients can call it using any one of:

- -
-  Map.find("foo");                 // Lookup "foo"
-  Map.find(std::string("bar"));    // Lookup "bar"
-  Map.find(StringRef("\0baz", 4)); // Lookup "\0baz"
-
- -

Similarly, APIs which need to return a string may return a StringRef -instance, which can be used directly or converted to an std::string -using the str member function. See -"llvm/ADT/StringRef.h" -for more information.

- -

You should rarely use the StringRef class directly, because it contains -pointers to external memory it is not generally safe to store an instance of the -class (unless you know that the external storage will not be freed). StringRef is -small and pervasive enough in LLVM that it should always be passed by value.

- -
- - -

- The Twine class -

- -
- -

The Twine class is an -efficient way for APIs to accept concatenated strings. For example, a common -LLVM paradigm is to name one instruction based on -the name of another instruction with a suffix, for example:

- -
-
-    New = CmpInst::Create(..., SO->getName() + ".cmp");
-
-
- -

The Twine class is effectively a lightweight -rope -which points to temporary (stack allocated) objects. Twines can be implicitly -constructed as the result of the plus operator applied to strings (i.e., a C -strings, an std::string, or a StringRef). The twine delays -the actual concatenation of strings until it is actually required, at which -point it can be efficiently rendered directly into a character array. This -avoids unnecessary heap allocation involved in constructing the temporary -results of string concatenation. See -"llvm/ADT/Twine.h" -and here for more information.

- -

As with a StringRef, Twine objects point to external memory -and should almost never be stored or mentioned directly. They are intended -solely for use when defining a function which should be able to efficiently -accept concatenated strings.

- -
- -
- - -

- The DEBUG() macro and -debug option -

- -
- -

Often when working on your pass you will put a bunch of debugging printouts -and other code into your pass. After you get it working, you want to remove -it, but you may need it again in the future (to work out new bugs that you run -across).

- -

Naturally, because of this, you don't want to delete the debug printouts, -but you don't want them to always be noisy. A standard compromise is to comment -them out, allowing you to enable them if you need them in the future.

- -

The "llvm/Support/Debug.h" -file provides a macro named DEBUG() that is a much nicer solution to -this problem. Basically, you can put arbitrary code into the argument of the -DEBUG macro, and it is only executed if 'opt' (or any other -tool) is run with the '-debug' command line argument:

- -
-
-DEBUG(errs() << "I am here!\n");
-
-
- -

Then you can run your pass like this:

- -
-
-$ opt < a.bc > /dev/null -mypass
-<no output>
-$ opt < a.bc > /dev/null -mypass -debug
-I am here!
-
-
- -

Using the DEBUG() macro instead of a home-brewed solution allows you -to not have to create "yet another" command line option for the debug output for -your pass. Note that DEBUG() macros are disabled for optimized builds, -so they do not cause a performance impact at all (for the same reason, they -should also not contain side-effects!).

- -

One additional nice thing about the DEBUG() macro is that you can -enable or disable it directly in gdb. Just use "set DebugFlag=0" or -"set DebugFlag=1" from the gdb if the program is running. If the -program hasn't been started yet, you can always just run it with --debug.

- - -

- Fine grained debug info with DEBUG_TYPE and - the -debug-only option -

- -
- -

Sometimes you may find yourself in a situation where enabling -debug -just turns on too much information (such as when working on the code -generator). If you want to enable debug information with more fine-grained -control, you define the DEBUG_TYPE macro and the -debug only -option as follows:

- -
-
-#undef  DEBUG_TYPE
-DEBUG(errs() << "No debug type\n");
-#define DEBUG_TYPE "foo"
-DEBUG(errs() << "'foo' debug type\n");
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE "bar"
-DEBUG(errs() << "'bar' debug type\n"));
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE ""
-DEBUG(errs() << "No debug type (2)\n");
-
-
- -

Then you can run your pass like this:

- -
-
-$ opt < a.bc > /dev/null -mypass
-<no output>
-$ opt < a.bc > /dev/null -mypass -debug
-No debug type
-'foo' debug type
-'bar' debug type
-No debug type (2)
-$ opt < a.bc > /dev/null -mypass -debug-only=foo
-'foo' debug type
-$ opt < a.bc > /dev/null -mypass -debug-only=bar
-'bar' debug type
-
-
- -

Of course, in practice, you should only set DEBUG_TYPE at the top of -a file, to specify the debug type for the entire module (if you do this before -you #include "llvm/Support/Debug.h", you don't have to insert the ugly -#undef's). Also, you should use names more meaningful than "foo" and -"bar", because there is no system in place to ensure that names do not -conflict. If two different modules use the same string, they will all be turned -on when the name is specified. This allows, for example, all debug information -for instruction scheduling to be enabled with -debug-type=InstrSched, -even if the source lives in multiple files.

- -

The DEBUG_WITH_TYPE macro is also available for situations where you -would like to set DEBUG_TYPE, but only for one specific DEBUG -statement. It takes an additional first parameter, which is the type to use. For -example, the preceding example could be written as:

- - -
-
-DEBUG_WITH_TYPE("", errs() << "No debug type\n");
-DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
-DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
-DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");
-
-
- -
- -
- - -

- The Statistic class & -stats - option -

- -
- -

The "llvm/ADT/Statistic.h" file -provides a class named Statistic that is used as a unified way to -keep track of what the LLVM compiler is doing and how effective various -optimizations are. It is useful to see what optimizations are contributing to -making a particular program run faster.

- -

Often you may run your pass on some big program, and you're interested to see -how many times it makes a certain transformation. Although you can do this with -hand inspection, or some ad-hoc method, this is a real pain and not very useful -for big programs. Using the Statistic class makes it very easy to -keep track of this information, and the calculated information is presented in a -uniform manner with the rest of the passes being executed.

- -

There are many examples of Statistic uses, but the basics of using -it are as follows:

- -
    -
  1. Define your statistic like this:

    - -
    -
    -#define DEBUG_TYPE "mypassname"   // This goes before any #includes.
    -STATISTIC(NumXForms, "The # of times I did stuff");
    -
    -
    - -

    The STATISTIC macro defines a static variable, whose name is - specified by the first argument. The pass name is taken from the DEBUG_TYPE - macro, and the description is taken from the second argument. The variable - defined ("NumXForms" in this case) acts like an unsigned integer.

  2. - -
  3. Whenever you make a transformation, bump the counter:

    - -
    -
    -++NumXForms;   // I did stuff!
    -
    -
    - -
  4. -
- -

That's all you have to do. To get 'opt' to print out the - statistics gathered, use the '-stats' option:

- -
-
-$ opt -stats -mypassname < program.bc > /dev/null
-... statistics output ...
-
-
- -

When running opt on a C file from the SPEC benchmark -suite, it gives a report that looks like this:

- -
-
-   7646 bitcodewriter   - Number of normal instructions
-    725 bitcodewriter   - Number of oversized instructions
- 129996 bitcodewriter   - Number of bitcode bytes written
-   2817 raise           - Number of insts DCEd or constprop'd
-   3213 raise           - Number of cast-of-self removed
-   5046 raise           - Number of expression trees converted
-     75 raise           - Number of other getelementptr's formed
-    138 raise           - Number of load/store peepholes
-     42 deadtypeelim    - Number of unused typenames removed from symtab
-    392 funcresolve     - Number of varargs functions resolved
-     27 globaldce       - Number of global variables removed
-      2 adce            - Number of basic blocks removed
-    134 cee             - Number of branches revectored
-     49 cee             - Number of setcc instruction eliminated
-    532 gcse            - Number of loads removed
-   2919 gcse            - Number of instructions removed
-     86 indvars         - Number of canonical indvars added
-     87 indvars         - Number of aux indvars removed
-     25 instcombine     - Number of dead inst eliminate
-    434 instcombine     - Number of insts combined
-    248 licm            - Number of load insts hoisted
-   1298 licm            - Number of insts hoisted to a loop pre-header
-      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
-     75 mem2reg         - Number of alloca's promoted
-   1444 cfgsimplify     - Number of blocks simplified
-
-
- -

Obviously, with so many optimizations, having a unified framework for this -stuff is very nice. Making your pass fit well into the framework makes it more -maintainable and useful.

- -
- - -

- Viewing graphs while debugging code -

- -
- -

Several of the important data structures in LLVM are graphs: for example -CFGs made out of LLVM BasicBlocks, CFGs made out of -LLVM MachineBasicBlocks, and -Instruction Selection -DAGs. In many cases, while debugging various parts of the compiler, it is -nice to instantly visualize these graphs.

- -

LLVM provides several callbacks that are available in a debug build to do -exactly that. If you call the Function::viewCFG() method, for example, -the current LLVM tool will pop up a window containing the CFG for the function -where each basic block is a node in the graph, and each node contains the -instructions in the block. Similarly, there also exists -Function::viewCFGOnly() (does not include the instructions), the -MachineFunction::viewCFG() and MachineFunction::viewCFGOnly(), -and the SelectionDAG::viewGraph() methods. Within GDB, for example, -you can usually use something like call DAG.viewGraph() to pop -up a window. Alternatively, you can sprinkle calls to these functions in your -code in places you want to debug.

- -

Getting this to work requires a small amount of configuration. On Unix -systems with X11, install the graphviz -toolkit, and make sure 'dot' and 'gv' are in your path. If you are running on -Mac OS/X, download and install the Mac OS/X Graphviz program, and add -/Applications/Graphviz.app/Contents/MacOS/ (or wherever you install -it) to your path. Once in your system and path are set up, rerun the LLVM -configure script and rebuild LLVM to enable this functionality.

- -

SelectionDAG has been extended to make it easier to locate -interesting nodes in large complex graphs. From gdb, if you -call DAG.setGraphColor(node, "color"), then the -next call DAG.viewGraph() would highlight the node in the -specified color (choices of colors can be found at colors.) More -complex node attributes can be provided with call -DAG.setGraphAttrs(node, "attributes") (choices can be -found at Graph -Attributes.) If you want to restart and clear all the current graph -attributes, then you can call DAG.clearGraphAttrs().

- -

Note that graph visualization features are compiled out of Release builds -to reduce file size. This means that you need a Debug+Asserts or -Release+Asserts build to use these features.

- -
- -
- - -

- Picking the Right Data Structure for a Task -

- - -
- -

LLVM has a plethora of data structures in the llvm/ADT/ directory, - and we commonly use STL data structures. This section describes the trade-offs - you should consider when you pick one.

- -

-The first step is a choose your own adventure: do you want a sequential -container, a set-like container, or a map-like container? The most important -thing when choosing a container is the algorithmic properties of how you plan to -access the container. Based on that, you should use:

- -
    -
  • a map-like container if you need efficient look-up - of an value based on another value. Map-like containers also support - efficient queries for containment (whether a key is in the map). Map-like - containers generally do not support efficient reverse mapping (values to - keys). If you need that, use two maps. Some map-like containers also - support efficient iteration through the keys in sorted order. Map-like - containers are the most expensive sort, only use them if you need one of - these capabilities.
  • - -
  • a set-like container if you need to put a bunch of - stuff into a container that automatically eliminates duplicates. Some - set-like containers support efficient iteration through the elements in - sorted order. Set-like containers are more expensive than sequential - containers. -
  • - -
  • a sequential container provides - the most efficient way to add elements and keeps track of the order they are - added to the collection. They permit duplicates and support efficient - iteration, but do not support efficient look-up based on a key. -
  • - -
  • a string container is a specialized sequential - container or reference structure that is used for character or byte - arrays.
  • - -
  • a bit container provides an efficient way to store and - perform set operations on sets of numeric id's, while automatically - eliminating duplicates. Bit containers require a maximum of 1 bit for each - identifier you want to store. -
  • -
- -

-Once the proper category of container is determined, you can fine tune the -memory use, constant factors, and cache behaviors of access by intelligently -picking a member of the category. Note that constant factors and cache behavior -can be a big deal. If you have a vector that usually only contains a few -elements (but could contain many), for example, it's much better to use -SmallVector than vector -. Doing so avoids (relatively) expensive malloc/free calls, which dwarf the -cost of adding the elements to the container.

- - -

- Sequential Containers (std::vector, std::list, etc) -

- -
-There are a variety of sequential containers available for you, based on your -needs. Pick the first in this section that will do what you want. - - -

- llvm/ADT/ArrayRef.h -

- -
-

The llvm::ArrayRef class is the preferred class to use in an interface that - accepts a sequential list of elements in memory and just reads from them. By - taking an ArrayRef, the API can be passed a fixed size array, an std::vector, - an llvm::SmallVector and anything else that is contiguous in memory. -

-
- - - - -

- Fixed Size Arrays -

- -
-

Fixed size arrays are very simple and very fast. They are good if you know -exactly how many elements you have, or you have a (low) upper bound on how many -you have.

-
- - -

- Heap Allocated Arrays -

- -
-

Heap allocated arrays (new[] + delete[]) are also simple. They are good if -the number of elements is variable, if you know how many elements you will need -before the array is allocated, and if the array is usually large (if not, -consider a SmallVector). The cost of a heap -allocated array is the cost of the new/delete (aka malloc/free). Also note that -if you are allocating an array of a type with a constructor, the constructor and -destructors will be run for every element in the array (re-sizable vectors only -construct those elements actually used).

-
- - -

- "llvm/ADT/TinyPtrVector.h" -

- - -
-

TinyPtrVector<Type> is a highly specialized collection class -that is optimized to avoid allocation in the case when a vector has zero or one -elements. It has two major restrictions: 1) it can only hold values of pointer -type, and 2) it cannot hold a null pointer.

- -

Since this container is highly specialized, it is rarely used.

- -
- - -

- "llvm/ADT/SmallVector.h" -

- -
-

SmallVector<Type, N> is a simple class that looks and smells -just like vector<Type>: -it supports efficient iteration, lays out elements in memory order (so you can -do pointer arithmetic between elements), supports efficient push_back/pop_back -operations, supports efficient random access to its elements, etc.

- -

The advantage of SmallVector is that it allocates space for -some number of elements (N) in the object itself. Because of this, if -the SmallVector is dynamically smaller than N, no malloc is performed. This can -be a big win in cases where the malloc/free call is far more expensive than the -code that fiddles around with the elements.

- -

This is good for vectors that are "usually small" (e.g. the number of -predecessors/successors of a block is usually less than 8). On the other hand, -this makes the size of the SmallVector itself large, so you don't want to -allocate lots of them (doing so will waste a lot of space). As such, -SmallVectors are most useful when on the stack.

- -

SmallVector also provides a nice portable and efficient replacement for -alloca.

- -
- - -

- <vector> -

- -
-

-std::vector is well loved and respected. It is useful when SmallVector isn't: -when the size of the vector is often large (thus the small optimization will -rarely be a benefit) or if you will be allocating many instances of the vector -itself (which would waste space for elements that aren't in the container). -vector is also useful when interfacing with code that expects vectors :). -

- -

One worthwhile note about std::vector: avoid code like this:

- -
-
-for ( ... ) {
-   std::vector<foo> V;
-   // make use of V.
-}
-
-
- -

Instead, write this as:

- -
-
-std::vector<foo> V;
-for ( ... ) {
-   // make use of V.
-   V.clear();
-}
-
-
- -

Doing so will save (at least) one heap allocation and free per iteration of -the loop.

- -
- - -

- <deque> -

- -
-

std::deque is, in some senses, a generalized version of std::vector. Like -std::vector, it provides constant time random access and other similar -properties, but it also provides efficient access to the front of the list. It -does not guarantee continuity of elements within memory.

- -

In exchange for this extra flexibility, std::deque has significantly higher -constant factor costs than std::vector. If possible, use std::vector or -something cheaper.

-
- - -

- <list> -

- -
-

std::list is an extremely inefficient class that is rarely useful. -It performs a heap allocation for every element inserted into it, thus having an -extremely high constant factor, particularly for small data types. std::list -also only supports bidirectional iteration, not random access iteration.

- -

In exchange for this high cost, std::list supports efficient access to both -ends of the list (like std::deque, but unlike std::vector or SmallVector). In -addition, the iterator invalidation characteristics of std::list are stronger -than that of a vector class: inserting or removing an element into the list does -not invalidate iterator or pointers to other elements in the list.

-
- - -

- llvm/ADT/ilist.h -

- -
-

ilist<T> implements an 'intrusive' doubly-linked list. It is -intrusive, because it requires the element to store and provide access to the -prev/next pointers for the list.

- -

ilist has the same drawbacks as std::list, and additionally -requires an ilist_traits implementation for the element type, but it -provides some novel characteristics. In particular, it can efficiently store -polymorphic objects, the traits class is informed when an element is inserted or -removed from the list, and ilists are guaranteed to support a -constant-time splice operation.

- -

These properties are exactly what we want for things like -Instructions and basic blocks, which is why these are implemented with -ilists.

- -Related classes of interest are explained in the following subsections: - -
- - -

- llvm/ADT/PackedVector.h -

- -
-

-Useful for storing a vector of values using only a few number of bits for each -value. Apart from the standard operations of a vector-like container, it can -also perform an 'or' set operation. -

- -

For example:

- -
-
-enum State {
-    None = 0x0,
-    FirstCondition = 0x1,
-    SecondCondition = 0x2,
-    Both = 0x3
-};
-
-State get() {
-    PackedVector<State, 2> Vec1;
-    Vec1.push_back(FirstCondition);
-
-    PackedVector<State, 2> Vec2;
-    Vec2.push_back(SecondCondition);
-
-    Vec1 |= Vec2;
-    return Vec1[0]; // returns 'Both'.
-}
-
-
- -
- - -

- ilist_traits -

- -
-

ilist_traits<T> is ilist<T>'s customization -mechanism. iplist<T> (and consequently ilist<T>) -publicly derive from this traits class.

-
- - -

- iplist -

- -
-

iplist<T> is ilist<T>'s base and as such -supports a slightly narrower interface. Notably, inserters from -T& are absent.

- -

ilist_traits<T> is a public base of this class and can be -used for a wide variety of customizations.

-
- - -

- llvm/ADT/ilist_node.h -

- -
-

ilist_node<T> implements a the forward and backward links -that are expected by the ilist<T> (and analogous containers) -in the default manner.

- -

ilist_node<T>s are meant to be embedded in the node type -T, usually T publicly derives from -ilist_node<T>.

-
- - -

- Sentinels -

- -
-

ilists have another specialty that must be considered. To be a good -citizen in the C++ ecosystem, it needs to support the standard container -operations, such as begin and end iterators, etc. Also, the -operator-- must work correctly on the end iterator in the -case of non-empty ilists.

- -

The only sensible solution to this problem is to allocate a so-called -sentinel along with the intrusive list, which serves as the end -iterator, providing the back-link to the last element. However conforming to the -C++ convention it is illegal to operator++ beyond the sentinel and it -also must not be dereferenced.

- -

These constraints allow for some implementation freedom to the ilist -how to allocate and store the sentinel. The corresponding policy is dictated -by ilist_traits<T>. By default a T gets heap-allocated -whenever the need for a sentinel arises.

- -

While the default policy is sufficient in most cases, it may break down when -T does not provide a default constructor. Also, in the case of many -instances of ilists, the memory overhead of the associated sentinels -is wasted. To alleviate the situation with numerous and voluminous -T-sentinels, sometimes a trick is employed, leading to ghostly -sentinels.

- -

Ghostly sentinels are obtained by specially-crafted ilist_traits<T> -which superpose the sentinel with the ilist instance in memory. Pointer -arithmetic is used to obtain the sentinel, which is relative to the -ilist's this pointer. The ilist is augmented by an -extra pointer, which serves as the back-link of the sentinel. This is the only -field in the ghostly sentinel which can be legally accessed.

-
- - -

- Other Sequential Container options -

- -
-

Other STL containers are available, such as std::string.

- -

There are also various STL adapter classes such as std::queue, -std::priority_queue, std::stack, etc. These provide simplified access to an -underlying container but don't affect the cost of the container itself.

- -
-
- - -

- String-like containers -

- -
- -

-There are a variety of ways to pass around and use strings in C and C++, and -LLVM adds a few new options to choose from. Pick the first option on this list -that will do what you need, they are ordered according to their relative cost. -

-

-Note that is is generally preferred to not pass strings around as -"const char*"'s. These have a number of problems, including the fact -that they cannot represent embedded nul ("\0") characters, and do not have a -length available efficiently. The general replacement for 'const -char*' is StringRef. -

- -

For more information on choosing string containers for APIs, please see -Passing strings.

- - - -

- llvm/ADT/StringRef.h -

- -
-

-The StringRef class is a simple value class that contains a pointer to a -character and a length, and is quite related to the ArrayRef class (but specialized for arrays of -characters). Because StringRef carries a length with it, it safely handles -strings with embedded nul characters in it, getting the length does not require -a strlen call, and it even has very convenient APIs for slicing and dicing the -character range that it represents. -

- -

-StringRef is ideal for passing simple strings around that are known to be live, -either because they are C string literals, std::string, a C array, or a -SmallVector. Each of these cases has an efficient implicit conversion to -StringRef, which doesn't result in a dynamic strlen being executed. -

- -

StringRef has a few major limitations which make more powerful string -containers useful:

- -
    -
  1. You cannot directly convert a StringRef to a 'const char*' because there is -no way to add a trailing nul (unlike the .c_str() method on various stronger -classes).
  2. - - -
  3. StringRef doesn't own or keep alive the underlying string bytes. -As such it can easily lead to dangling pointers, and is not suitable for -embedding in datastructures in most cases (instead, use an std::string or -something like that).
  4. - -
  5. For the same reason, StringRef cannot be used as the return value of a -method if the method "computes" the result string. Instead, use -std::string.
  6. - -
  7. StringRef's do not allow you to mutate the pointed-to string bytes and it -doesn't allow you to insert or remove bytes from the range. For editing -operations like this, it interoperates with the Twine class.
  8. -
- -

Because of its strengths and limitations, it is very common for a function to -take a StringRef and for a method on an object to return a StringRef that -points into some string that it owns.

- -
- - -

- llvm/ADT/Twine.h -

- -
-

- The Twine class is used as an intermediary datatype for APIs that want to take - a string that can be constructed inline with a series of concatenations. - Twine works by forming recursive instances of the Twine datatype (a simple - value object) on the stack as temporary objects, linking them together into a - tree which is then linearized when the Twine is consumed. Twine is only safe - to use as the argument to a function, and should always be a const reference, - e.g.: -

- -
-    void foo(const Twine &T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    foo(X + "." + Twine(i));
-  
- -

This example forms a string like "blarg.42" by concatenating the values - together, and does not form intermediate strings containing "blarg" or - "blarg.". -

- -

Because Twine is constructed with temporary objects on the stack, and - because these instances are destroyed at the end of the current statement, - it is an inherently dangerous API. For example, this simple variant contains - undefined behavior and will probably crash:

- -
-    void foo(const Twine &T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    const Twine &Tmp = X + "." + Twine(i);
-    foo(Tmp);
-  
- -

... because the temporaries are destroyed before the call. That said, - Twine's are much more efficient than intermediate std::string temporaries, and - they work really well with StringRef. Just be aware of their limitations.

- -
- - - -

- llvm/ADT/SmallString.h -

- -
- -

SmallString is a subclass of SmallVector that -adds some convenience APIs like += that takes StringRef's. SmallString avoids -allocating memory in the case when the preallocated space is enough to hold its -data, and it calls back to general heap allocation when required. Since it owns -its data, it is very safe to use and supports full mutation of the string.

- -

Like SmallVector's, the big downside to SmallString is their sizeof. While -they are optimized for small strings, they themselves are not particularly -small. This means that they work great for temporary scratch buffers on the -stack, but should not generally be put into the heap: it is very rare to -see a SmallString as the member of a frequently-allocated heap data structure -or returned by-value. -

- -
- - -

- std::string -

- -
- -

The standard C++ std::string class is a very general class that (like - SmallString) owns its underlying data. sizeof(std::string) is very reasonable - so it can be embedded into heap data structures and returned by-value. - On the other hand, std::string is highly inefficient for inline editing (e.g. - concatenating a bunch of stuff together) and because it is provided by the - standard library, its performance characteristics depend a lot of the host - standard library (e.g. libc++ and MSVC provide a highly optimized string - class, GCC contains a really slow implementation). -

- -

The major disadvantage of std::string is that almost every operation that - makes them larger can allocate memory, which is slow. As such, it is better - to use SmallVector or Twine as a scratch buffer, but then use std::string to - persist the result.

- - -
- - -
- - - -

- Set-Like Containers (std::set, SmallSet, SetVector, etc) -

- -
- -

Set-like containers are useful when you need to canonicalize multiple values -into a single representation. There are several different choices for how to do -this, providing various trade-offs.

- - -

- A sorted 'vector' -

- -
- -

If you intend to insert a lot of elements, then do a lot of queries, a -great approach is to use a vector (or other sequential container) with -std::sort+std::unique to remove duplicates. This approach works really well if -your usage pattern has these two distinct phases (insert then query), and can be -coupled with a good choice of sequential container. -

- -

-This combination provides the several nice properties: the result data is -contiguous in memory (good for cache locality), has few allocations, is easy to -address (iterators in the final vector are just indices or pointers), and can be -efficiently queried with a standard binary or radix search.

- -
- - -

- "llvm/ADT/SmallSet.h" -

- -
- -

If you have a set-like data structure that is usually small and whose elements -are reasonably small, a SmallSet<Type, N> is a good choice. This set -has space for N elements in place (thus, if the set is dynamically smaller than -N, no malloc traffic is required) and accesses them with a simple linear search. -When the set grows beyond 'N' elements, it allocates a more expensive representation that -guarantees efficient access (for most types, it falls back to std::set, but for -pointers it uses something far better, SmallPtrSet).

- -

The magic of this class is that it handles small sets extremely efficiently, -but gracefully handles extremely large sets without loss of efficiency. The -drawback is that the interface is quite small: it supports insertion, queries -and erasing, but does not support iteration.

- -
- - -

- "llvm/ADT/SmallPtrSet.h" -

- -
- -

SmallPtrSet has all the advantages of SmallSet (and a SmallSet of pointers is -transparently implemented with a SmallPtrSet), but also supports iterators. If -more than 'N' insertions are performed, a single quadratically -probed hash table is allocated and grows as needed, providing extremely -efficient access (constant time insertion/deleting/queries with low constant -factors) and is very stingy with malloc traffic.

- -

Note that, unlike std::set, the iterators of SmallPtrSet are invalidated -whenever an insertion occurs. Also, the values visited by the iterators are not -visited in sorted order.

- -
- - -

- "llvm/ADT/DenseSet.h" -

- -
- -

-DenseSet is a simple quadratically probed hash table. It excels at supporting -small values: it uses a single allocation to hold all of the pairs that -are currently inserted in the set. DenseSet is a great way to unique small -values that are not simple pointers (use SmallPtrSet for pointers). Note that DenseSet has -the same requirements for the value type that DenseMap has. -

- -
- - -

- "llvm/ADT/SparseSet.h" -

- -
- -

SparseSet holds a small number of objects identified by unsigned keys of -moderate size. It uses a lot of memory, but provides operations that are -almost as fast as a vector. Typical keys are physical registers, virtual -registers, or numbered basic blocks.

- -

SparseSet is useful for algorithms that need very fast clear/find/insert/erase -and fast iteration over small sets. It is not intended for building composite -data structures.

- -
- - -

- "llvm/ADT/FoldingSet.h" -

- -
- -

-FoldingSet is an aggregate class that is really good at uniquing -expensive-to-create or polymorphic objects. It is a combination of a chained -hash table with intrusive links (uniqued objects are required to inherit from -FoldingSetNode) that uses SmallVector as part of -its ID process.

- -

Consider a case where you want to implement a "getOrCreateFoo" method for -a complex object (for example, a node in the code generator). The client has a -description of *what* it wants to generate (it knows the opcode and all the -operands), but we don't want to 'new' a node, then try inserting it into a set -only to find out it already exists, at which point we would have to delete it -and return the node that already exists. -

- -

To support this style of client, FoldingSet perform a query with a -FoldingSetNodeID (which wraps SmallVector) that can be used to describe the -element that we want to query for. The query either returns the element -matching the ID or it returns an opaque ID that indicates where insertion should -take place. Construction of the ID usually does not require heap traffic.

- -

Because FoldingSet uses intrusive links, it can support polymorphic objects -in the set (for example, you can have SDNode instances mixed with LoadSDNodes). -Because the elements are individually allocated, pointers to the elements are -stable: inserting or removing elements does not invalidate any pointers to other -elements. -

- -
- - -

- <set> -

- -
- -

std::set is a reasonable all-around set class, which is decent at -many things but great at nothing. std::set allocates memory for each element -inserted (thus it is very malloc intensive) and typically stores three pointers -per element in the set (thus adding a large amount of per-element space -overhead). It offers guaranteed log(n) performance, which is not particularly -fast from a complexity standpoint (particularly if the elements of the set are -expensive to compare, like strings), and has extremely high constant factors for -lookup, insertion and removal.

- -

The advantages of std::set are that its iterators are stable (deleting or -inserting an element from the set does not affect iterators or pointers to other -elements) and that iteration over the set is guaranteed to be in sorted order. -If the elements in the set are large, then the relative overhead of the pointers -and malloc traffic is not a big deal, but if the elements of the set are small, -std::set is almost never a good choice.

- -
- - -

- "llvm/ADT/SetVector.h" -

- -
-

LLVM's SetVector<Type> is an adapter class that combines your choice of -a set-like container along with a Sequential -Container. The important property -that this provides is efficient insertion with uniquing (duplicate elements are -ignored) with iteration support. It implements this by inserting elements into -both a set-like container and the sequential container, using the set-like -container for uniquing and the sequential container for iteration. -

- -

The difference between SetVector and other sets is that the order of -iteration is guaranteed to match the order of insertion into the SetVector. -This property is really important for things like sets of pointers. Because -pointer values are non-deterministic (e.g. vary across runs of the program on -different machines), iterating over the pointers in the set will -not be in a well-defined order.

- -

-The drawback of SetVector is that it requires twice as much space as a normal -set and has the sum of constant factors from the set-like container and the -sequential container that it uses. Use it *only* if you need to iterate over -the elements in a deterministic order. SetVector is also expensive to delete -elements out of (linear time), unless you use it's "pop_back" method, which is -faster. -

- -

SetVector is an adapter class that defaults to - using std::vector and a size 16 SmallSet for the underlying - containers, so it is quite expensive. However, - "llvm/ADT/SetVector.h" also provides a SmallSetVector - class, which defaults to using a SmallVector and SmallSet - of a specified size. If you use this, and if your sets are dynamically - smaller than N, you will save a lot of heap traffic.

- -
- - -

- "llvm/ADT/UniqueVector.h" -

- -
- -

-UniqueVector is similar to SetVector, but it -retains a unique ID for each element inserted into the set. It internally -contains a map and a vector, and it assigns a unique ID for each value inserted -into the set.

- -

UniqueVector is very expensive: its cost is the sum of the cost of -maintaining both the map and vector, it has high complexity, high constant -factors, and produces a lot of malloc traffic. It should be avoided.

- -
- - -

- "llvm/ADT/ImmutableSet.h" -

- -
- -

-ImmutableSet is an immutable (functional) set implementation based on an AVL -tree. -Adding or removing elements is done through a Factory object and results in the -creation of a new ImmutableSet object. -If an ImmutableSet already exists with the given contents, then the existing one -is returned; equality is compared with a FoldingSetNodeID. -The time and space complexity of add or remove operations is logarithmic in the -size of the original set. - -

-There is no method for returning an element of the set, you can only check for -membership. - -

- - - -

- Other Set-Like Container Options -

- -
- -

-The STL provides several other options, such as std::multiset and the various -"hash_set" like containers (whether from C++ TR1 or from the SGI library). We -never use hash_set and unordered_set because they are generally very expensive -(each insertion requires a malloc) and very non-portable. -

- -

std::multiset is useful if you're not interested in elimination of -duplicates, but has all the drawbacks of std::set. A sorted vector (where you -don't delete duplicate entries) or some other approach is almost always -better.

- -
- -
- - -

- Map-Like Containers (std::map, DenseMap, etc) -

- -
-Map-like containers are useful when you want to associate data to a key. As -usual, there are a lot of different ways to do this. :) - - -

- A sorted 'vector' -

- -
- -

-If your usage pattern follows a strict insert-then-query approach, you can -trivially use the same approach as sorted vectors -for set-like containers. The only difference is that your query function -(which uses std::lower_bound to get efficient log(n) lookup) should only compare -the key, not both the key and value. This yields the same advantages as sorted -vectors for sets. -

-
- - -

- "llvm/ADT/StringMap.h" -

- -
- -

-Strings are commonly used as keys in maps, and they are difficult to support -efficiently: they are variable length, inefficient to hash and compare when -long, expensive to copy, etc. StringMap is a specialized container designed to -cope with these issues. It supports mapping an arbitrary range of bytes to an -arbitrary other object.

- -

The StringMap implementation uses a quadratically-probed hash table, where -the buckets store a pointer to the heap allocated entries (and some other -stuff). The entries in the map must be heap allocated because the strings are -variable length. The string data (key) and the element object (value) are -stored in the same allocation with the string data immediately after the element -object. This container guarantees the "(char*)(&Value+1)" points -to the key string for a value.

- -

The StringMap is very fast for several reasons: quadratic probing is very -cache efficient for lookups, the hash value of strings in buckets is not -recomputed when looking up an element, StringMap rarely has to touch the -memory for unrelated objects when looking up a value (even when hash collisions -happen), hash table growth does not recompute the hash values for strings -already in the table, and each pair in the map is store in a single allocation -(the string data is stored in the same allocation as the Value of a pair).

- -

StringMap also provides query methods that take byte ranges, so it only ever -copies a string if a value is inserted into the table.

- -

StringMap iteratation order, however, is not guaranteed to be deterministic, -so any uses which require that should instead use a std::map.

-
- - -

- "llvm/ADT/IndexedMap.h" -

- -
-

-IndexedMap is a specialized container for mapping small dense integers (or -values that can be mapped to small dense integers) to some other type. It is -internally implemented as a vector with a mapping function that maps the keys to -the dense integer range. -

- -

-This is useful for cases like virtual registers in the LLVM code generator: they -have a dense mapping that is offset by a compile-time constant (the first -virtual register ID).

- -
- - -

- "llvm/ADT/DenseMap.h" -

- -
- -

-DenseMap is a simple quadratically probed hash table. It excels at supporting -small keys and values: it uses a single allocation to hold all of the pairs that -are currently inserted in the map. DenseMap is a great way to map pointers to -pointers, or map other small types to each other. -

- -

-There are several aspects of DenseMap that you should be aware of, however. The -iterators in a DenseMap are invalidated whenever an insertion occurs, unlike -map. Also, because DenseMap allocates space for a large number of key/value -pairs (it starts with 64 by default), it will waste a lot of space if your keys -or values are large. Finally, you must implement a partial specialization of -DenseMapInfo for the key that you want, if it isn't already supported. This -is required to tell DenseMap about two special marker values (which can never be -inserted into the map) that it needs internally.

- -

-DenseMap's find_as() method supports lookup operations using an alternate key -type. This is useful in cases where the normal key type is expensive to -construct, but cheap to compare against. The DenseMapInfo is responsible for -defining the appropriate comparison and hashing methods for each alternate -key type used. -

- -
- - -

- "llvm/ADT/ValueMap.h" -

- -
- -

-ValueMap is a wrapper around a DenseMap mapping -Value*s (or subclasses) to another type. When a Value is deleted or RAUW'ed, -ValueMap will update itself so the new version of the key is mapped to the same -value, just as if the key were a WeakVH. You can configure exactly how this -happens, and what else happens on these two events, by passing -a Config parameter to the ValueMap template.

- -
- - -

- "llvm/ADT/IntervalMap.h" -

- -
- -

IntervalMap is a compact map for small keys and values. It maps key -intervals instead of single keys, and it will automatically coalesce adjacent -intervals. When then map only contains a few intervals, they are stored in the -map object itself to avoid allocations.

- -

The IntervalMap iterators are quite big, so they should not be passed around -as STL iterators. The heavyweight iterators allow a smaller data structure.

- -
- - -

- <map> -

- -
- -

-std::map has similar characteristics to std::set: it uses -a single allocation per pair inserted into the map, it offers log(n) lookup with -an extremely large constant factor, imposes a space penalty of 3 pointers per -pair in the map, etc.

- -

std::map is most useful when your keys or values are very large, if you need -to iterate over the collection in sorted order, or if you need stable iterators -into the map (i.e. they don't get invalidated if an insertion or deletion of -another element takes place).

- -
- - - -

- "llvm/ADT/MapVector.h" -

-
- -

MapVector<KeyT,ValueT> provides a subset of the DenseMap interface. - The main difference is that the iteration order is guaranteed to be - the insertion order, making it an easy (but somewhat expensive) solution - for non-deterministic iteration over maps of pointers.

- -

It is implemented by mapping from key to an index in a vector of key,value - pairs. This provides fast lookup and iteration, but has two main drawbacks: - The key is stored twice and it doesn't support removing elements.

- -
- - -

- "llvm/ADT/IntEqClasses.h" -

- -
- -

IntEqClasses provides a compact representation of equivalence classes of -small integers. Initially, each integer in the range 0..n-1 has its own -equivalence class. Classes can be joined by passing two class representatives to -the join(a, b) method. Two integers are in the same class when findLeader() -returns the same representative.

- -

Once all equivalence classes are formed, the map can be compressed so each -integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m -is the total number of equivalence classes. The map must be uncompressed before -it can be edited again.

- -
- - -

- "llvm/ADT/ImmutableMap.h" -

- -
- -

-ImmutableMap is an immutable (functional) map implementation based on an AVL -tree. -Adding or removing elements is done through a Factory object and results in the -creation of a new ImmutableMap object. -If an ImmutableMap already exists with the given key set, then the existing one -is returned; equality is compared with a FoldingSetNodeID. -The time and space complexity of add or remove operations is logarithmic in the -size of the original map. - -

- - -

- Other Map-Like Container Options -

- -
- -

-The STL provides several other options, such as std::multimap and the various -"hash_map" like containers (whether from C++ TR1 or from the SGI library). We -never use hash_set and unordered_set because they are generally very expensive -(each insertion requires a malloc) and very non-portable.

- -

std::multimap is useful if you want to map a key to multiple values, but has -all the drawbacks of std::map. A sorted vector or some other approach is almost -always better.

- -
- -
- - -

- Bit storage containers (BitVector, SparseBitVector) -

- -
-

Unlike the other containers, there are only two bit storage containers, and -choosing when to use each is relatively straightforward.

- -

One additional option is -std::vector<bool>: we discourage its use for two reasons 1) the -implementation in many common compilers (e.g. commonly available versions of -GCC) is extremely inefficient and 2) the C++ standards committee is likely to -deprecate this container and/or change it significantly somehow. In any case, -please don't use it.

- - -

- BitVector -

- -
-

The BitVector container provides a dynamic size set of bits for manipulation. -It supports individual bit setting/testing, as well as set operations. The set -operations take time O(size of bitvector), but operations are performed one word -at a time, instead of one bit at a time. This makes the BitVector very fast for -set operations compared to other containers. Use the BitVector when you expect -the number of set bits to be high (IE a dense set). -

-
- - -

- SmallBitVector -

- -
-

The SmallBitVector container provides the same interface as BitVector, but -it is optimized for the case where only a small number of bits, less than -25 or so, are needed. It also transparently supports larger bit counts, but -slightly less efficiently than a plain BitVector, so SmallBitVector should -only be used when larger counts are rare. -

- -

-At this time, SmallBitVector does not support set operations (and, or, xor), -and its operator[] does not provide an assignable lvalue. -

-
- - -

- SparseBitVector -

- -
-

The SparseBitVector container is much like BitVector, with one major -difference: Only the bits that are set, are stored. This makes the -SparseBitVector much more space efficient than BitVector when the set is sparse, -as well as making set operations O(number of set bits) instead of O(size of -universe). The downside to the SparseBitVector is that setting and testing of random bits is O(N), and on large SparseBitVectors, this can be slower than BitVector. In our implementation, setting or testing bits in sorted order -(either forwards or reverse) is O(1) worst case. Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1). As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit). -

-
- -
- -
- - -

- Helpful Hints for Common Operations -

- - -
- -

This section describes how to perform some very simple transformations of -LLVM code. This is meant to give examples of common idioms used, showing the -practical side of LLVM transformations.

Because this is a "how-to" section, -you should also read about the main classes that you will be working with. The -Core LLVM Class Hierarchy Reference contains details -and descriptions of the main classes that you should know about.

- - - -

- Basic Inspection and Traversal Routines -

- -
- -

The LLVM compiler infrastructure have many different data structures that may -be traversed. Following the example of the C++ standard template library, the -techniques used to traverse these various data structures are all basically the -same. For a enumerable sequence of values, the XXXbegin() function (or -method) returns an iterator to the start of the sequence, the XXXend() -function returns an iterator pointing to one past the last valid element of the -sequence, and there is some XXXiterator data type that is common -between the two operations.

- -

Because the pattern for iteration is common across many different aspects of -the program representation, the standard template library algorithms may be used -on them, and it is easier to remember how to iterate. First we show a few common -examples of the data structures that need to be traversed. Other data -structures are traversed in very similar ways.

- - -

- Iterating over the BasicBlocks in a Function -

- -
- -

It's quite common to have a Function instance that you'd like to -transform in some way; in particular, you'd like to manipulate its -BasicBlocks. To facilitate this, you'll need to iterate over all of -the BasicBlocks that constitute the Function. The following is -an example that prints the name of a BasicBlock and the number of -Instructions it contains:

- -
-
-// func is a pointer to a Function instance
-for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
-  // Print out the name of the basic block if it has one, and then the
-  // number of instructions that it contains
-  errs() << "Basic block (name=" << i->getName() << ") has "
-             << i->size() << " instructions.\n";
-
-
- -

Note that i can be used as if it were a pointer for the purposes of -invoking member functions of the Instruction class. This is -because the indirection operator is overloaded for the iterator -classes. In the above code, the expression i->size() is -exactly equivalent to (*i).size() just like you'd expect.

- -
- - -

- Iterating over the Instructions in a BasicBlock -

- -
- -

Just like when dealing with BasicBlocks in Functions, it's -easy to iterate over the individual instructions that make up -BasicBlocks. Here's a code snippet that prints out each instruction in -a BasicBlock:

- -
-
-// blk is a pointer to a BasicBlock instance
-for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
-   // The next statement works since operator<<(ostream&,...)
-   // is overloaded for Instruction&
-   errs() << *i << "\n";
-
-
- -

However, this isn't really the best way to print out the contents of a -BasicBlock! Since the ostream operators are overloaded for virtually -anything you'll care about, you could have just invoked the print routine on the -basic block itself: errs() << *blk << "\n";.

- -
- - -

- Iterating over the Instructions in a Function -

- -
- -

If you're finding that you commonly iterate over a Function's -BasicBlocks and then that BasicBlock's Instructions, -InstIterator should be used instead. You'll need to include llvm/Support/InstIterator.h, -and then instantiate InstIterators explicitly in your code. Here's a -small example that shows how to dump all instructions in a function to the standard error stream:

- -

-
-#include "llvm/Support/InstIterator.h"
-
-// F is a pointer to a Function instance
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-  errs() << *I << "\n";
-
-
- -

Easy, isn't it? You can also use InstIterators to fill a -work list with its initial contents. For example, if you wanted to -initialize a work list to contain all instructions in a Function -F, all you would need to do is something like:

- -
-
-std::set<Instruction*> worklist;
-// or better yet, SmallPtrSet<Instruction*, 64> worklist;
-
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-   worklist.insert(&*I);
-
-
- -

The STL set worklist would now contain all instructions in the -Function pointed to by F.

- -
- - -

- Turning an iterator into a class pointer (and - vice-versa) -

- -
- -

Sometimes, it'll be useful to grab a reference (or pointer) to a class -instance when all you've got at hand is an iterator. Well, extracting -a reference or a pointer from an iterator is very straight-forward. -Assuming that i is a BasicBlock::iterator and j -is a BasicBlock::const_iterator:

- -
-
-Instruction& inst = *i;   // Grab reference to instruction reference
-Instruction* pinst = &*i; // Grab pointer to instruction reference
-const Instruction& inst = *j;
-
-
- -

However, the iterators you'll be working with in the LLVM framework are -special: they will automatically convert to a ptr-to-instance type whenever they -need to. Instead of dereferencing the iterator and then taking the address of -the result, you can simply assign the iterator to the proper pointer type and -you get the dereference and address-of operation as a result of the assignment -(behind the scenes, this is a result of overloading casting mechanisms). Thus -the last line of the last example,

- -
-
-Instruction *pinst = &*i;
-
-
- -

is semantically equivalent to

- -
-
-Instruction *pinst = i;
-
-
- -

It's also possible to turn a class pointer into the corresponding iterator, -and this is a constant time operation (very efficient). The following code -snippet illustrates use of the conversion constructors provided by LLVM -iterators. By using these, you can explicitly grab the iterator of something -without actually obtaining it via iteration over some structure:

- -
-
-void printNextInstruction(Instruction* inst) {
-  BasicBlock::iterator it(inst);
-  ++it; // After this line, it refers to the instruction after *inst
-  if (it != inst->getParent()->end()) errs() << *it << "\n";
-}
-
-
- -

Unfortunately, these implicit conversions come at a cost; they prevent -these iterators from conforming to standard iterator conventions, and thus -from being usable with standard algorithms and containers. For example, they -prevent the following code, where B is a BasicBlock, -from compiling:

- -
-
-  llvm::SmallVector<llvm::Instruction *, 16>(B->begin(), B->end());
-
-
- -

Because of this, these implicit conversions may be removed some day, -and operator* changed to return a pointer instead of a reference.

- -
- - -

- Finding call sites: a slightly more complex - example -

- -
- -

Say that you're writing a FunctionPass and would like to count all the -locations in the entire module (that is, across every Function) where a -certain function (i.e., some Function*) is already in scope. As you'll -learn later, you may want to use an InstVisitor to accomplish this in a -much more straight-forward manner, but this example will allow us to explore how -you'd do it if you didn't have InstVisitor around. In pseudo-code, this -is what we want to do:

- -
-
-initialize callCounter to zero
-for each Function f in the Module
-  for each BasicBlock b in f
-    for each Instruction i in b
-      if (i is a CallInst and calls the given function)
-        increment callCounter
-
-
- -

And the actual code is (remember, because we're writing a -FunctionPass, our FunctionPass-derived class simply has to -override the runOnFunction method):

- -
-
-Function* targetFunc = ...;
-
-class OurFunctionPass : public FunctionPass {
-  public:
-    OurFunctionPass(): callCounter(0) { }
-
-    virtual runOnFunction(Function& F) {
-      for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
-        for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
-          if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
-            // We know we've encountered a call instruction, so we
-            // need to determine if it's a call to the
-            // function pointed to by m_func or not.
-            if (callInst->getCalledFunction() == targetFunc)
-              ++callCounter;
-          }
-        }
-      }
-    }
-
-  private:
-    unsigned callCounter;
-};
-
-
- -
- - -

- Treating calls and invokes the same way -

- -
- -

You may have noticed that the previous example was a bit oversimplified in -that it did not deal with call sites generated by 'invoke' instructions. In -this, and in other situations, you may find that you want to treat -CallInsts and InvokeInsts the same way, even though their -most-specific common base class is Instruction, which includes lots of -less closely-related things. For these cases, LLVM provides a handy wrapper -class called CallSite. -It is essentially a wrapper around an Instruction pointer, with some -methods that provide functionality common to CallInsts and -InvokeInsts.

- -

This class has "value semantics": it should be passed by value, not by -reference and it should not be dynamically allocated or deallocated using -operator new or operator delete. It is efficiently copyable, -assignable and constructable, with costs equivalents to that of a bare pointer. -If you look at its definition, it has only a single pointer member.

- -
- - -

- Iterating over def-use & use-def chains -

- -
- -

Frequently, we might have an instance of the Value Class and we want to -determine which Users use the Value. The list of all -Users of a particular Value is called a def-use chain. -For example, let's say we have a Function* named F to a -particular function foo. Finding all of the instructions that -use foo is as simple as iterating over the def-use chain -of F:

- -
-
-Function *F = ...;
-
-for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i)
-  if (Instruction *Inst = dyn_cast<Instruction>(*i)) {
-    errs() << "F is used in instruction:\n";
-    errs() << *Inst << "\n";
-  }
-
-
- -

Note that dereferencing a Value::use_iterator is not a very cheap -operation. Instead of performing *i above several times, consider -doing it only once in the loop body and reusing its result.

- -

Alternatively, it's common to have an instance of the User Class and need to know what -Values are used by it. The list of all Values used by a -User is known as a use-def chain. Instances of class -Instruction are common Users, so we might want to iterate over -all of the values that a particular instruction uses (that is, the operands of -the particular Instruction):

- -
-
-Instruction *pi = ...;
-
-for (User::op_iterator i = pi->op_begin(), e = pi->op_end(); i != e; ++i) {
-  Value *v = *i;
-  // ...
-}
-
-
- -

Declaring objects as const is an important tool of enforcing -mutation free algorithms (such as analyses, etc.). For this purpose above -iterators come in constant flavors as Value::const_use_iterator -and Value::const_op_iterator. They automatically arise when -calling use/op_begin() on const Value*s or -const User*s respectively. Upon dereferencing, they return -const Use*s. Otherwise the above patterns remain unchanged.

- -
- - -

- Iterating over predecessors & -successors of blocks -

- -
- -

Iterating over the predecessors and successors of a block is quite easy -with the routines defined in "llvm/Support/CFG.h". Just use code like -this to iterate over all predecessors of BB:

- -
-
-#include "llvm/Support/CFG.h"
-BasicBlock *BB = ...;
-
-for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-  BasicBlock *Pred = *PI;
-  // ...
-}
-
-
- -

Similarly, to iterate over successors use -succ_iterator/succ_begin/succ_end.

- -
- -
- - -

- Making simple changes -

- -
- -

There are some primitive transformation operations present in the LLVM -infrastructure that are worth knowing about. When performing -transformations, it's fairly common to manipulate the contents of basic -blocks. This section describes some of the common methods for doing so -and gives example code.

- - -

- Creating and inserting new - Instructions -

- -
- -

Instantiating Instructions

- -

Creation of Instructions is straight-forward: simply call the -constructor for the kind of instruction to instantiate and provide the necessary -parameters. For example, an AllocaInst only requires a -(const-ptr-to) Type. Thus:

- -
-
-AllocaInst* ai = new AllocaInst(Type::Int32Ty);
-
-
- -

will create an AllocaInst instance that represents the allocation of -one integer in the current stack frame, at run time. Each Instruction -subclass is likely to have varying default parameters which change the semantics -of the instruction, so refer to the doxygen documentation for the subclass of -Instruction that you're interested in instantiating.

- -

Naming values

- -

It is very useful to name the values of instructions when you're able to, as -this facilitates the debugging of your transformations. If you end up looking -at generated LLVM machine code, you definitely want to have logical names -associated with the results of instructions! By supplying a value for the -Name (default) parameter of the Instruction constructor, you -associate a logical name with the result of the instruction's execution at -run time. For example, say that I'm writing a transformation that dynamically -allocates space for an integer on the stack, and that integer is going to be -used as some kind of index by some other code. To accomplish this, I place an -AllocaInst at the first point in the first BasicBlock of some -Function, and I'm intending to use it within the same -Function. I might do:

- -
-
-AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
-
-
- -

where indexLoc is now the logical name of the instruction's -execution value, which is a pointer to an integer on the run time stack.

- -

Inserting instructions

- -

There are essentially two ways to insert an Instruction -into an existing sequence of instructions that form a BasicBlock:

- -
    -
  • Insertion into an explicit instruction list - -

    Given a BasicBlock* pb, an Instruction* pi within that - BasicBlock, and a newly-created instruction we wish to insert - before *pi, we do the following:

    - -
    -
    -BasicBlock *pb = ...;
    -Instruction *pi = ...;
    -Instruction *newInst = new Instruction(...);
    -
    -pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
    -
    -
    - -

    Appending to the end of a BasicBlock is so common that - the Instruction class and Instruction-derived - classes provide constructors which take a pointer to a - BasicBlock to be appended to. For example code that - looked like:

    - -
    -
    -BasicBlock *pb = ...;
    -Instruction *newInst = new Instruction(...);
    -
    -pb->getInstList().push_back(newInst); // Appends newInst to pb
    -
    -
    - -

    becomes:

    - -
    -
    -BasicBlock *pb = ...;
    -Instruction *newInst = new Instruction(..., pb);
    -
    -
    - -

    which is much cleaner, especially if you are creating - long instruction streams.

  • - -
  • Insertion into an implicit instruction list - -

    Instruction instances that are already in BasicBlocks - are implicitly associated with an existing instruction list: the instruction - list of the enclosing basic block. Thus, we could have accomplished the same - thing as the above code without being given a BasicBlock by doing: -

    - -
    -
    -Instruction *pi = ...;
    -Instruction *newInst = new Instruction(...);
    -
    -pi->getParent()->getInstList().insert(pi, newInst);
    -
    -
    - -

    In fact, this sequence of steps occurs so frequently that the - Instruction class and Instruction-derived classes provide - constructors which take (as a default parameter) a pointer to an - Instruction which the newly-created Instruction should - precede. That is, Instruction constructors are capable of - inserting the newly-created instance into the BasicBlock of a - provided instruction, immediately before that instruction. Using an - Instruction constructor with a insertBefore (default) - parameter, the above code becomes:

    - -
    -
    -Instruction* pi = ...;
    -Instruction* newInst = new Instruction(..., pi);
    -
    -
    - -

    which is much cleaner, especially if you're creating a lot of - instructions and adding them to BasicBlocks.

  • -
- -
- - -

- Deleting Instructions -

- -
- -

Deleting an instruction from an existing sequence of instructions that form a -BasicBlock is very straight-forward: just -call the instruction's eraseFromParent() method. For example:

- -
-
-Instruction *I = .. ;
-I->eraseFromParent();
-
-
- -

This unlinks the instruction from its containing basic block and deletes -it. If you'd just like to unlink the instruction from its containing basic -block but not delete it, you can use the removeFromParent() method.

- -
- - -

- Replacing an Instruction with another - Value -

- -
- -
Replacing individual instructions
- -

Including "llvm/Transforms/Utils/BasicBlockUtils.h" -permits use of two very useful replace functions: ReplaceInstWithValue -and ReplaceInstWithInst.

- -
Deleting Instructions
- -
-
    -
  • ReplaceInstWithValue - -

    This function replaces all uses of a given instruction with a value, - and then removes the original instruction. The following example - illustrates the replacement of the result of a particular - AllocaInst that allocates memory for a single integer with a null - pointer to an integer.

    - -
    -
    -AllocaInst* instToReplace = ...;
    -BasicBlock::iterator ii(instToReplace);
    -
    -ReplaceInstWithValue(instToReplace->getParent()->getInstList(), ii,
    -                     Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
    -
  • - -
  • ReplaceInstWithInst - -

    This function replaces a particular instruction with another - instruction, inserting the new instruction into the basic block at the - location where the old instruction was, and replacing any uses of the old - instruction with the new instruction. The following example illustrates - the replacement of one AllocaInst with another.

    - -
    -
    -AllocaInst* instToReplace = ...;
    -BasicBlock::iterator ii(instToReplace);
    -
    -ReplaceInstWithInst(instToReplace->getParent()->getInstList(), ii,
    -                    new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
    -
  • -
- -
- -
Replacing multiple uses of Users and Values
- -

You can use Value::replaceAllUsesWith and -User::replaceUsesOfWith to change more than one use at a time. See the -doxygen documentation for the Value Class -and User Class, respectively, for more -information.

- - - -
- - -

- Deleting GlobalVariables -

- -
- -

Deleting a global variable from a module is just as easy as deleting an -Instruction. First, you must have a pointer to the global variable that you wish - to delete. You use this pointer to erase it from its parent, the module. - For example:

- -
-
-GlobalVariable *GV = .. ;
-
-GV->eraseFromParent();
-
-
- -
- -
- - -

- How to Create Types -

- -
- -

In generating IR, you may need some complex types. If you know these types -statically, you can use TypeBuilder<...>::get(), defined -in llvm/Support/TypeBuilder.h, to retrieve them. TypeBuilder -has two forms depending on whether you're building types for cross-compilation -or native library use. TypeBuilder<T, true> requires -that T be independent of the host environment, meaning that it's built -out of types from -the llvm::types -namespace and pointers, functions, arrays, etc. built of -those. TypeBuilder<T, false> additionally allows native C types -whose size may depend on the host compiler. For example,

- -
-
-FunctionType *ft = TypeBuilder<types::i<8>(types::i<32>*), true>::get();
-
-
- -

is easier to read and write than the equivalent

- -
-
-std::vector<const Type*> params;
-params.push_back(PointerType::getUnqual(Type::Int32Ty));
-FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
-
-
- -

See the class -comment for more details.

- -
- -
- - -

- Threads and LLVM -

- - -
-

-This section describes the interaction of the LLVM APIs with multithreading, -both on the part of client applications, and in the JIT, in the hosted -application. -

- -

-Note that LLVM's support for multithreading is still relatively young. Up -through version 2.5, the execution of threaded hosted applications was -supported, but not threaded client access to the APIs. While this use case is -now supported, clients must adhere to the guidelines specified below to -ensure proper operation in multithreaded mode. -

- -

-Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic -intrinsics in order to support threaded operation. If you need a -multhreading-capable LLVM on a platform without a suitably modern system -compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and -using the resultant compiler to build a copy of LLVM with multithreading -support. -

- - -

- Entering and Exiting Multithreaded Mode -

- -
- -

-In order to properly protect its internal data structures while avoiding -excessive locking overhead in the single-threaded case, the LLVM must intialize -certain data structures necessary to provide guards around its internals. To do -so, the client program must invoke llvm_start_multithreaded() before -making any concurrent LLVM API calls. To subsequently tear down these -structures, use the llvm_stop_multithreaded() call. You can also use -the llvm_is_multithreaded() call to check the status of multithreaded -mode. -

- -

-Note that both of these calls must be made in isolation. That is to -say that no other LLVM API calls may be executing at any time during the -execution of llvm_start_multithreaded() or llvm_stop_multithreaded -. It's is the client's responsibility to enforce this isolation. -

- -

-The return value of llvm_start_multithreaded() indicates the success or -failure of the initialization. Failure typically indicates that your copy of -LLVM was built without multithreading support, typically because GCC atomic -intrinsics were not found in your system compiler. In this case, the LLVM API -will not be safe for concurrent calls. However, it will be safe for -hosting threaded applications in the JIT, though care -must be taken to ensure that side exits and the like do not accidentally -result in concurrent LLVM API calls. -

-
- - -

- Ending Execution with llvm_shutdown() -

- -
-

-When you are done using the LLVM APIs, you should call llvm_shutdown() -to deallocate memory used for internal structures. This will also invoke -llvm_stop_multithreaded() if LLVM is operating in multithreaded mode. -As such, llvm_shutdown() requires the same isolation guarantees as -llvm_stop_multithreaded(). -

- -

-Note that, if you use scope-based shutdown, you can use the -llvm_shutdown_obj class, which calls llvm_shutdown() in its -destructor. -

- - -

- Lazy Initialization with ManagedStatic -

- -
-

-ManagedStatic is a utility class in LLVM used to implement static -initialization of static resources, such as the global type tables. Before the -invocation of llvm_shutdown(), it implements a simple lazy -initialization scheme. Once llvm_start_multithreaded() returns, -however, it uses double-checked locking to implement thread-safe lazy -initialization. -

- -

-Note that, because no other threads are allowed to issue LLVM API calls before -llvm_start_multithreaded() returns, it is possible to have -ManagedStatics of llvm::sys::Mutexs. -

- -

-The llvm_acquire_global_lock() and llvm_release_global_lock -APIs provide access to the global lock used to implement the double-checked -locking for lazy initialization. These should only be used internally to LLVM, -and only if you know what you're doing! -

-
- - -

- Achieving Isolation with LLVMContext -

- -
-

-LLVMContext is an opaque class in the LLVM API which clients can use -to operate multiple, isolated instances of LLVM concurrently within the same -address space. For instance, in a hypothetical compile-server, the compilation -of an individual translation unit is conceptually independent from all the -others, and it would be desirable to be able to compile incoming translation -units concurrently on independent server threads. Fortunately, -LLVMContext exists to enable just this kind of scenario! -

- -

-Conceptually, LLVMContext provides isolation. Every LLVM entity -(Modules, Values, Types, Constants, etc.) -in LLVM's in-memory IR belongs to an LLVMContext. Entities in -different contexts cannot interact with each other: Modules in -different contexts cannot be linked together, Functions cannot be added -to Modules in different contexts, etc. What this means is that is is -safe to compile on multiple threads simultaneously, as long as no two threads -operate on entities within the same context. -

- -

-In practice, very few places in the API require the explicit specification of a -LLVMContext, other than the Type creation/lookup APIs. -Because every Type carries a reference to its owning context, most -other entities can determine what context they belong to by looking at their -own Type. If you are adding new entities to LLVM IR, please try to -maintain this interface design. -

- -

-For clients that do not require the benefits of isolation, LLVM -provides a convenience API getGlobalContext(). This returns a global, -lazily initialized LLVMContext that may be used in situations where -isolation is not a concern. -

-
- - -

- Threads and the JIT -

- -
-

-LLVM's "eager" JIT compiler is safe to use in threaded programs. Multiple -threads can call ExecutionEngine::getPointerToFunction() or -ExecutionEngine::runFunction() concurrently, and multiple threads can -run code output by the JIT concurrently. The user must still ensure that only -one thread accesses IR in a given LLVMContext while another thread -might be modifying it. One way to do that is to always hold the JIT lock while -accessing IR outside the JIT (the JIT modifies the IR by adding -CallbackVHs). Another way is to only -call getPointerToFunction() from the LLVMContext's thread. -

- -

When the JIT is configured to compile lazily (using -ExecutionEngine::DisableLazyCompilation(false)), there is currently a -race condition in -updating call sites after a function is lazily-jitted. It's still possible to -use the lazy JIT in a threaded program if you ensure that only one thread at a -time can call any particular lazy stub and that the JIT lock guards any IR -access, but we suggest using only the eager JIT in threaded programs. -

-
- -
- - -

- Advanced Topics -

- - -
-

-This section describes some of the advanced or obscure API's that most clients -do not need to be aware of. These API's tend manage the inner workings of the -LLVM system, and only need to be accessed in unusual circumstances. -

- - - -

- The ValueSymbolTable class -

- -
-

The -ValueSymbolTable class provides a symbol table that the Function and -Module classes use for naming value definitions. The symbol table -can provide a name for any Value. -

- -

Note that the SymbolTable class should not be directly accessed -by most clients. It should only be used when iteration over the symbol table -names themselves are required, which is very special purpose. Note that not -all LLVM -Values have names, and those without names (i.e. they have -an empty name) do not exist in the symbol table. -

- -

Symbol tables support iteration over the values in the symbol -table with begin/end/iterator and supports querying to see if a -specific name is in the symbol table (with lookup). The -ValueSymbolTable class exposes no public mutator methods, instead, -simply call setName on a value, which will autoinsert it into the -appropriate symbol table.

- -
- - - - -

- The User and owned Use classes' memory layout -

- -
-

The -User class provides a basis for expressing the ownership of User -towards other -Values. The -Use helper class is employed to do the bookkeeping and to facilitate O(1) -addition and removal.

- - -

- - Interaction and relationship between User and Use objects - -

- -
-

-A subclass of User can choose between incorporating its Use objects -or refer to them out-of-line by means of a pointer. A mixed variant -(some Uses inline others hung off) is impractical and breaks the invariant -that the Use objects belonging to the same User form a contiguous array. -

- -

-We have 2 different layouts in the User (sub)classes: -

    -
  • Layout a) -The Use object(s) are inside (resp. at fixed offset) of the User -object and there are a fixed number of them.

    - -
  • Layout b) -The Use object(s) are referenced by a pointer to an -array from the User object and there may be a variable -number of them.

    -
-

-As of v2.4 each layout still possesses a direct pointer to the -start of the array of Uses. Though not mandatory for layout a), -we stick to this redundancy for the sake of simplicity. -The User object also stores the number of Use objects it -has. (Theoretically this information can also be calculated -given the scheme presented below.)

-

-Special forms of allocation operators (operator new) -enforce the following memory layouts:

- -
    -
  • Layout a) is modelled by prepending the User object by the Use[] array.

    - -
    -...---.---.---.---.-------...
    -  | P | P | P | P | User
    -'''---'---'---'---'-------'''
    -
    - -
  • Layout b) is modelled by pointing at the Use[] array.

    -
    -.-------...
    -| User
    -'-------'''
    -    |
    -    v
    -    .---.---.---.---...
    -    | P | P | P | P |
    -    '---'---'---'---'''
    -
    -
-(In the above figures 'P' stands for the Use** that - is stored in each Use object in the member Use::Prev) - -
- - -

- The waymarking algorithm -

- -
-

-Since the Use objects are deprived of the direct (back)pointer to -their User objects, there must be a fast and exact method to -recover it. This is accomplished by the following scheme:

- -A bit-encoding in the 2 LSBits (least significant bits) of the Use::Prev allows to find the -start of the User object: -
    -
  • 00 —> binary digit 0
  • -
  • 01 —> binary digit 1
  • -
  • 10 —> stop and calculate (s)
  • -
  • 11 —> full stop (S)
  • -
-

-Given a Use*, all we have to do is to walk till we get -a stop and we either have a User immediately behind or -we have to walk to the next stop picking up digits -and calculating the offset:

-
-.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
-| 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
-'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
-    |+15                |+10            |+6         |+3     |+1
-    |                   |               |           |       |__>
-    |                   |               |           |__________>
-    |                   |               |______________________>
-    |                   |______________________________________>
-    |__________________________________________________________>
-
-

-Only the significant number of bits need to be stored between the -stops, so that the worst case is 20 memory accesses when there are -1000 Use objects associated with a User.

- -
- - -

- Reference implementation -

- -
-

-The following literate Haskell fragment demonstrates the concept:

- -
-
-> import Test.QuickCheck
-> 
-> digits :: Int -> [Char] -> [Char]
-> digits 0 acc = '0' : acc
-> digits 1 acc = '1' : acc
-> digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
-> 
-> dist :: Int -> [Char] -> [Char]
-> dist 0 [] = ['S']
-> dist 0 acc = acc
-> dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
-> dist n acc = dist (n - 1) $ dist 1 acc
-> 
-> takeLast n ss = reverse $ take n $ reverse ss
-> 
-> test = takeLast 40 $ dist 20 []
-> 
-
-
-

-Printing <test> gives: "1s100000s11010s10100s1111s1010s110s11s1S"

-

-The reverse algorithm computes the length of the string just by examining -a certain prefix:

- -
-
-> pref :: [Char] -> Int
-> pref "S" = 1
-> pref ('s':'1':rest) = decode 2 1 rest
-> pref (_:rest) = 1 + pref rest
-> 
-> decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
-> decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
-> decode walk acc _ = walk + acc
-> 
-
-
-

-Now, as expected, printing <pref test> gives 40.

-

-We can quickCheck this with following property:

- -
-
-> testcase = dist 2000 []
-> testcaseLength = length testcase
-> 
-> identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
->     where arr = takeLast n testcase
-> 
-
-
-

-As expected <quickCheck identityProp> gives:

- -
-*Main> quickCheck identityProp
-OK, passed 100 tests.
-
-

-Let's be a bit more exhaustive:

- -
-
-> 
-> deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
-> 
-
-
-

-And here is the result of <deepCheck identityProp>:

- -
-*Main> deepCheck identityProp
-OK, passed 500 tests.
-
- -
- - -

- Tagging considerations -

- -
- -

-To maintain the invariant that the 2 LSBits of each Use** in Use -never change after being set up, setters of Use::Prev must re-tag the -new Use** on every modification. Accordingly getters must strip the -tag bits.

-

-For layout b) instead of the User we find a pointer (User* with LSBit set). -Following this pointer brings us to the User. A portable trick ensures -that the first bytes of User (if interpreted as a pointer) never has -the LSBit set. (Portability is relying on the fact that all known compilers place the -vptr in the first word of the instances.)

- -
- -
- -
- - -

- The Core LLVM Class Hierarchy Reference -

- - -
-

#include "llvm/Type.h" -
doxygen info: Type Class

- -

The Core LLVM classes are the primary means of representing the program -being inspected or transformed. The core LLVM classes are defined in -header files in the include/llvm/ directory, and implemented in -the lib/VMCore directory.

- - -

- The Type class and Derived Types -

- -
- -

Type is a superclass of all type classes. Every Value has - a Type. Type cannot be instantiated directly but only - through its subclasses. Certain primitive types (VoidType, - LabelType, FloatType and DoubleType) have hidden - subclasses. They are hidden because they offer no useful functionality beyond - what the Type class offers except to distinguish themselves from - other subclasses of Type.

-

All other types are subclasses of DerivedType. Types can be - named, but this is not a requirement. There exists exactly - one instance of a given shape at any one time. This allows type equality to - be performed with address equality of the Type Instance. That is, given two - Type* values, the types are identical if the pointers are identical. -

- - -

- Important Public Methods -

- -
- -
    -
  • bool isIntegerTy() const: Returns true for any integer type.
  • - -
  • bool isFloatingPointTy(): Return true if this is one of the five - floating point types.
  • - -
  • bool isSized(): Return true if the type has known size. Things - that don't have a size are abstract types, labels and void.
  • - -
-
- - -

- Important Derived Types -

-
-
-
IntegerType
-
Subclass of DerivedType that represents integer types of any bit width. - Any bit width between IntegerType::MIN_INT_BITS (1) and - IntegerType::MAX_INT_BITS (~8 million) can be represented. -
    -
  • static const IntegerType* get(unsigned NumBits): get an integer - type of a specific bit width.
  • -
  • unsigned getBitWidth() const: Get the bit width of an integer - type.
  • -
-
-
SequentialType
-
This is subclassed by ArrayType, PointerType and VectorType. -
    -
  • const Type * getElementType() const: Returns the type of each - of the elements in the sequential type.
  • -
-
-
ArrayType
-
This is a subclass of SequentialType and defines the interface for array - types. -
    -
  • unsigned getNumElements() const: Returns the number of - elements in the array.
  • -
-
-
PointerType
-
Subclass of SequentialType for pointer types.
-
VectorType
-
Subclass of SequentialType for vector types. A - vector type is similar to an ArrayType but is distinguished because it is - a first class type whereas ArrayType is not. Vector types are used for - vector operations and are usually small vectors of of an integer or floating - point type.
-
StructType
-
Subclass of DerivedTypes for struct types.
-
FunctionType
-
Subclass of DerivedTypes for function types. -
    -
  • bool isVarArg() const: Returns true if it's a vararg - function
  • -
  • const Type * getReturnType() const: Returns the - return type of the function.
  • -
  • const Type * getParamType (unsigned i): Returns - the type of the ith parameter.
  • -
  • const unsigned getNumParams() const: Returns the - number of formal parameters.
  • -
-
-
-
- -
- - -

- The Module class -

- -
- -

#include "llvm/Module.h"
doxygen info: -Module Class

- -

The Module class represents the top level structure present in LLVM -programs. An LLVM module is effectively either a translation unit of the -original program or a combination of several translation units merged by the -linker. The Module class keeps track of a list of Functions, a list of GlobalVariables, and a SymbolTable. Additionally, it contains a few -helpful member functions that try to make common operations easy.

- - -

- Important Public Members of the Module class -

- -
- -
    -
  • Module::Module(std::string name = "") - -

    Constructing a Module is easy. You can optionally -provide a name for it (probably based on the name of the translation unit).

    -
  • - -
  • Module::iterator - Typedef for function list iterator
    - Module::const_iterator - Typedef for const_iterator.
    - - begin(), end() - size(), empty() - -

    These are forwarding methods that make it easy to access the contents of - a Module object's Function - list.

  • - -
  • Module::FunctionListType &getFunctionList() - -

    Returns the list of Functions. This is - necessary to use when you need to update the list or perform a complex - action that doesn't have a forwarding method.

    - -

  • -
- -
- -
    -
  • Module::global_iterator - Typedef for global variable list iterator
    - - Module::const_global_iterator - Typedef for const_iterator.
    - - global_begin(), global_end() - global_size(), global_empty() - -

    These are forwarding methods that make it easy to access the contents of - a Module object's GlobalVariable list.

  • - -
  • Module::GlobalListType &getGlobalList() - -

    Returns the list of GlobalVariables. This is necessary to - use when you need to update the list or perform a complex action that - doesn't have a forwarding method.

    - -

  • -
- -
- - - -
- -
    - -
  • Function *getFunction(StringRef Name) const - - -

    Look up the specified function in the Module SymbolTable. If it does not exist, return - null.

  • - -
  • Function *getOrInsertFunction(const - std::string &Name, const FunctionType *T) - -

    Look up the specified function in the Module SymbolTable. If it does not exist, add an - external declaration for the function and return it.

  • - -
  • std::string getTypeName(const Type *Ty) - -

    If there is at least one entry in the SymbolTable for the specified Type, return it. Otherwise return the empty - string.

  • - -
  • bool addTypeName(const std::string &Name, const Type *Ty) - -

    Insert an entry in the SymbolTable - mapping Name to Ty. If there is already an entry for this - name, true is returned and the SymbolTable is not modified.

  • -
- -
- -
- - -

- The Value class -

- -
- -

#include "llvm/Value.h" -
-doxygen info: Value Class

- -

The Value class is the most important class in the LLVM Source -base. It represents a typed value that may be used (among other things) as an -operand to an instruction. There are many different types of Values, -such as Constants,Arguments. Even Instructions and Functions are Values.

- -

A particular Value may be used many times in the LLVM representation -for a program. For example, an incoming argument to a function (represented -with an instance of the Argument class) is "used" by -every instruction in the function that references the argument. To keep track -of this relationship, the Value class keeps a list of all of the Users that is using it (the User class is a base class for all nodes in the LLVM -graph that can refer to Values). This use list is how LLVM represents -def-use information in the program, and is accessible through the use_* -methods, shown below.

- -

Because LLVM is a typed representation, every LLVM Value is typed, -and this Type is available through the getType() -method. In addition, all LLVM values can be named. The "name" of the -Value is a symbolic string printed in the LLVM code:

- -
-
-%foo = add i32 1, 2
-
-
- -

The name of this instruction is "foo". NOTE -that the name of any value may be missing (an empty string), so names should -ONLY be used for debugging (making the source code easier to read, -debugging printouts), they should not be used to keep track of values or map -between them. For this purpose, use a std::map of pointers to the -Value itself instead.

- -

One important aspect of LLVM is that there is no distinction between an SSA -variable and the operation that produces it. Because of this, any reference to -the value produced by an instruction (or the value available as an incoming -argument, for example) is represented as a direct pointer to the instance of -the class that -represents this value. Although this may take some getting used to, it -simplifies the representation and makes it easier to manipulate.

- - -

- Important Public Members of the Value class -

- -
- -
    -
  • Value::use_iterator - Typedef for iterator over the -use-list
    - Value::const_use_iterator - Typedef for const_iterator over -the use-list
    - unsigned use_size() - Returns the number of users of the -value.
    - bool use_empty() - Returns true if there are no users.
    - use_iterator use_begin() - Get an iterator to the start of -the use-list.
    - use_iterator use_end() - Get an iterator to the end of the -use-list.
    - User *use_back() - Returns the last -element in the list. -

    These methods are the interface to access the def-use -information in LLVM. As with all other iterators in LLVM, the naming -conventions follow the conventions defined by the STL.

    -
  • -
  • Type *getType() const -

    This method returns the Type of the Value.

    -
  • -
  • bool hasName() const
    - std::string getName() const
    - void setName(const std::string &Name) -

    This family of methods is used to access and assign a name to a Value, -be aware of the precaution above.

    -
  • -
  • void replaceAllUsesWith(Value *V) - -

    This method traverses the use list of a Value changing all Users of the current value to refer to - "V" instead. For example, if you detect that an instruction always - produces a constant value (for example through constant folding), you can - replace all uses of the instruction with the constant like this:

    - -
    -
    -Inst->replaceAllUsesWith(ConstVal);
    -
    -
    - -
- -
- -
- - -

- The User class -

- -
- -

-#include "llvm/User.h"
-doxygen info: User Class
-Superclass: Value

- -

The User class is the common base class of all LLVM nodes that may -refer to Values. It exposes a list of "Operands" -that are all of the Values that the User is -referring to. The User class itself is a subclass of -Value.

- -

The operands of a User point directly to the LLVM Value that it refers to. Because LLVM uses Static -Single Assignment (SSA) form, there can only be one definition referred to, -allowing this direct connection. This connection provides the use-def -information in LLVM.

- - -

- Important Public Members of the User class -

- -
- -

The User class exposes the operand list in two ways: through -an index access interface and through an iterator based interface.

- -
    -
  • Value *getOperand(unsigned i)
    - unsigned getNumOperands() -

    These two methods expose the operands of the User in a -convenient form for direct access.

  • - -
  • User::op_iterator - Typedef for iterator over the operand -list
    - op_iterator op_begin() - Get an iterator to the start of -the operand list.
    - op_iterator op_end() - Get an iterator to the end of the -operand list. -

    Together, these methods make up the iterator based interface to -the operands of a User.

  • -
- -
- -
- - -

- The Instruction class -

- -
- -

#include "llvm/Instruction.h"
-doxygen info: Instruction Class
-Superclasses: User, Value

- -

The Instruction class is the common base class for all LLVM -instructions. It provides only a few methods, but is a very commonly used -class. The primary data tracked by the Instruction class itself is the -opcode (instruction type) and the parent BasicBlock the Instruction is embedded -into. To represent a specific type of instruction, one of many subclasses of -Instruction are used.

- -

Because the Instruction class subclasses the User class, its operands can be accessed in the same -way as for other Users (with the -getOperand()/getNumOperands() and -op_begin()/op_end() methods).

An important file for -the Instruction class is the llvm/Instruction.def file. This -file contains some meta-data about the various different types of instructions -in LLVM. It describes the enum values that are used as opcodes (for example -Instruction::Add and Instruction::ICmp), as well as the -concrete sub-classes of Instruction that implement the instruction (for -example BinaryOperator and CmpInst). Unfortunately, the use of macros in -this file confuses doxygen, so these enum values don't show up correctly in the -doxygen output.

- - -

- - Important Subclasses of the Instruction class - -

-
-
    -
  • BinaryOperator -

    This subclasses represents all two operand instructions whose operands - must be the same type, except for the comparison instructions.

  • -
  • CastInst -

    This subclass is the parent of the 12 casting instructions. It provides - common operations on cast instructions.

    -
  • CmpInst -

    This subclass respresents the two comparison instructions, - ICmpInst (integer opreands), and - FCmpInst (floating point operands).

    -
  • TerminatorInst -

    This subclass is the parent of all terminator instructions (those which - can terminate a block).

    -
-
- - -

- - Important Public Members of the Instruction class - -

- -
- -
    -
  • BasicBlock *getParent() -

    Returns the BasicBlock that -this Instruction is embedded into.

  • -
  • bool mayWriteToMemory() -

    Returns true if the instruction writes to memory, i.e. it is a - call,free,invoke, or store.

  • -
  • unsigned getOpcode() -

    Returns the opcode for the Instruction.

  • -
  • Instruction *clone() const -

    Returns another instance of the specified instruction, identical -in all ways to the original except that the instruction has no parent -(ie it's not embedded into a BasicBlock), -and it has no name

  • -
- -
- -
- - -

- The Constant class and subclasses -

- -
- -

Constant represents a base class for different types of constants. It -is subclassed by ConstantInt, ConstantArray, etc. for representing -the various types of Constants. GlobalValue is also -a subclass, which represents the address of a global variable or function. -

- - -

Important Subclasses of Constant

-
-
    -
  • ConstantInt : This subclass of Constant represents an integer constant of - any width. -
      -
    • const APInt& getValue() const: Returns the underlying - value of this constant, an APInt value.
    • -
    • int64_t getSExtValue() const: Converts the underlying APInt - value to an int64_t via sign extension. If the value (not the bit width) - of the APInt is too large to fit in an int64_t, an assertion will result. - For this reason, use of this method is discouraged.
    • -
    • uint64_t getZExtValue() const: Converts the underlying APInt - value to a uint64_t via zero extension. IF the value (not the bit width) - of the APInt is too large to fit in a uint64_t, an assertion will result. - For this reason, use of this method is discouraged.
    • -
    • static ConstantInt* get(const APInt& Val): Returns the - ConstantInt object that represents the value provided by Val. - The type is implied as the IntegerType that corresponds to the bit width - of Val.
    • -
    • static ConstantInt* get(const Type *Ty, uint64_t Val): - Returns the ConstantInt object that represents the value provided by - Val for integer type Ty.
    • -
    -
  • -
  • ConstantFP : This class represents a floating point constant. -
      -
    • double getValue() const: Returns the underlying value of - this constant.
    • -
    -
  • -
  • ConstantArray : This represents a constant array. -
      -
    • const std::vector<Use> &getValues() const: Returns - a vector of component constants that makeup this array.
    • -
    -
  • -
  • ConstantStruct : This represents a constant struct. -
      -
    • const std::vector<Use> &getValues() const: Returns - a vector of component constants that makeup this array.
    • -
    -
  • -
  • GlobalValue : This represents either a global variable or a function. In - either case, the value is a constant fixed address (after linking). -
  • -
-
- -
- - -

- The GlobalValue class -

- -
- -

#include "llvm/GlobalValue.h"
-doxygen info: GlobalValue -Class
-Superclasses: Constant, -User, Value

- -

Global values (GlobalVariables or Functions) are the only LLVM values that are -visible in the bodies of all Functions. -Because they are visible at global scope, they are also subject to linking with -other globals defined in different translation units. To control the linking -process, GlobalValues know their linkage rules. Specifically, -GlobalValues know whether they have internal or external linkage, as -defined by the LinkageTypes enumeration.

- -

If a GlobalValue has internal linkage (equivalent to being -static in C), it is not visible to code outside the current translation -unit, and does not participate in linking. If it has external linkage, it is -visible to external code, and does participate in linking. In addition to -linkage information, GlobalValues keep track of which Module they are currently part of.

- -

Because GlobalValues are memory objects, they are always referred to -by their address. As such, the Type of a -global is always a pointer to its contents. It is important to remember this -when using the GetElementPtrInst instruction because this pointer must -be dereferenced first. For example, if you have a GlobalVariable (a -subclass of GlobalValue) that is an array of 24 ints, type [24 x -i32], then the GlobalVariable is a pointer to that array. Although -the address of the first element of this array and the value of the -GlobalVariable are the same, they have different types. The -GlobalVariable's type is [24 x i32]. The first element's type -is i32. Because of this, accessing a global value requires you to -dereference the pointer with GetElementPtrInst first, then its elements -can be accessed. This is explained in the LLVM -Language Reference Manual.

- - -

- - Important Public Members of the GlobalValue class - -

- -
- -
    -
  • bool hasInternalLinkage() const
    - bool hasExternalLinkage() const
    - void setInternalLinkage(bool HasInternalLinkage) -

    These methods manipulate the linkage characteristics of the GlobalValue.

    -

    -
  • -
  • Module *getParent() -

    This returns the Module that the -GlobalValue is currently embedded into.

  • -
- -
- -
- - -

- The Function class -

- -
- -

#include "llvm/Function.h"
doxygen -info: Function Class
-Superclasses: GlobalValue, -Constant, -User, -Value

- -

The Function class represents a single procedure in LLVM. It is -actually one of the more complex classes in the LLVM hierarchy because it must -keep track of a large amount of data. The Function class keeps track -of a list of BasicBlocks, a list of formal -Arguments, and a -SymbolTable.

- -

The list of BasicBlocks is the most -commonly used part of Function objects. The list imposes an implicit -ordering of the blocks in the function, which indicate how the code will be -laid out by the backend. Additionally, the first BasicBlock is the implicit entry node for the -Function. It is not legal in LLVM to explicitly branch to this initial -block. There are no implicit exit nodes, and in fact there may be multiple exit -nodes from a single Function. If the BasicBlock list is empty, this indicates that -the Function is actually a function declaration: the actual body of the -function hasn't been linked in yet.

- -

In addition to a list of BasicBlocks, the -Function class also keeps track of the list of formal Arguments that the function receives. This -container manages the lifetime of the Argument -nodes, just like the BasicBlock list does for -the BasicBlocks.

- -

The SymbolTable is a very rarely used -LLVM feature that is only used when you have to look up a value by name. Aside -from that, the SymbolTable is used -internally to make sure that there are not conflicts between the names of Instructions, BasicBlocks, or Arguments in the function body.

- -

Note that Function is a GlobalValue -and therefore also a Constant. The value of the function -is its address (after linking) which is guaranteed to be constant.

- - -

- - Important Public Members of the Function class - -

- -
- -
    -
  • Function(const FunctionType - *Ty, LinkageTypes Linkage, const std::string &N = "", Module* Parent = 0) - -

    Constructor used when you need to create new Functions to add - the program. The constructor must specify the type of the function to - create and what type of linkage the function should have. The FunctionType argument - specifies the formal arguments and return value for the function. The same - FunctionType value can be used to - create multiple functions. The Parent argument specifies the Module - in which the function is defined. If this argument is provided, the function - will automatically be inserted into that module's list of - functions.

  • - -
  • bool isDeclaration() - -

    Return whether or not the Function has a body defined. If the - function is "external", it does not have a body, and thus must be resolved - by linking with a function defined in a different translation unit.

  • - -
  • Function::iterator - Typedef for basic block list iterator
    - Function::const_iterator - Typedef for const_iterator.
    - - begin(), end() - size(), empty() - -

    These are forwarding methods that make it easy to access the contents of - a Function object's BasicBlock - list.

  • - -
  • Function::BasicBlockListType &getBasicBlockList() - -

    Returns the list of BasicBlocks. This - is necessary to use when you need to update the list or perform a complex - action that doesn't have a forwarding method.

  • - -
  • Function::arg_iterator - Typedef for the argument list -iterator
    - Function::const_arg_iterator - Typedef for const_iterator.
    - - arg_begin(), arg_end() - arg_size(), arg_empty() - -

    These are forwarding methods that make it easy to access the contents of - a Function object's Argument - list.

  • - -
  • Function::ArgumentListType &getArgumentList() - -

    Returns the list of Arguments. This is - necessary to use when you need to update the list or perform a complex - action that doesn't have a forwarding method.

  • - -
  • BasicBlock &getEntryBlock() - -

    Returns the entry BasicBlock for the - function. Because the entry block for the function is always the first - block, this returns the first block of the Function.

  • - -
  • Type *getReturnType()
    - FunctionType *getFunctionType() - -

    This traverses the Type of the - Function and returns the return type of the function, or the FunctionType of the actual - function.

  • - -
  • SymbolTable *getSymbolTable() - -

    Return a pointer to the SymbolTable - for this Function.

  • -
- -
- -
- - -

- The GlobalVariable class -

- -
- -

#include "llvm/GlobalVariable.h" -
-doxygen info: GlobalVariable - Class
-Superclasses: GlobalValue, -Constant, -User, -Value

- -

Global variables are represented with the (surprise surprise) -GlobalVariable class. Like functions, GlobalVariables are also -subclasses of GlobalValue, and as such are -always referenced by their address (global values must live in memory, so their -"name" refers to their constant address). See -GlobalValue for more on this. Global -variables may have an initial value (which must be a -Constant), and if they have an initializer, -they may be marked as "constant" themselves (indicating that their contents -never change at runtime).

- - -

- - Important Public Members of the GlobalVariable class - -

- -
- -
    -
  • GlobalVariable(const Type *Ty, bool - isConstant, LinkageTypes& Linkage, Constant - *Initializer = 0, const std::string &Name = "", Module* Parent = 0) - -

    Create a new global variable of the specified type. If - isConstant is true then the global variable will be marked as - unchanging for the program. The Linkage parameter specifies the type of - linkage (internal, external, weak, linkonce, appending) for the variable. - If the linkage is InternalLinkage, WeakAnyLinkage, WeakODRLinkage, - LinkOnceAnyLinkage or LinkOnceODRLinkage,  then the resultant - global variable will have internal linkage. AppendingLinkage concatenates - together all instances (in different translation units) of the variable - into a single variable but is only applicable to arrays.  See - the LLVM Language Reference for - further details on linkage types. Optionally an initializer, a name, and the - module to put the variable into may be specified for the global variable as - well.

  • - -
  • bool isConstant() const - -

    Returns true if this is a global variable that is known not to - be modified at runtime.

  • - -
  • bool hasInitializer() - -

    Returns true if this GlobalVariable has an intializer.

  • - -
  • Constant *getInitializer() - -

    Returns the initial value for a GlobalVariable. It is not legal - to call this method if there is no initializer.

  • -
- -
- -
- - -

- The BasicBlock class -

- -
- -

#include "llvm/BasicBlock.h"
-doxygen info: BasicBlock -Class
-Superclass: Value

- -

This class represents a single entry single exit section of the code, -commonly known as a basic block by the compiler community. The -BasicBlock class maintains a list of Instructions, which form the body of the block. -Matching the language definition, the last element of this list of instructions -is always a terminator instruction (a subclass of the TerminatorInst class).

- -

In addition to tracking the list of instructions that make up the block, the -BasicBlock class also keeps track of the Function that it is embedded into.

- -

Note that BasicBlocks themselves are Values, because they are referenced by instructions -like branches and can go in the switch tables. BasicBlocks have type -label.

- - -

- - Important Public Members of the BasicBlock class - -

- -
-
    - -
  • BasicBlock(const std::string &Name = "", Function *Parent = 0) - -

    The BasicBlock constructor is used to create new basic blocks for -insertion into a function. The constructor optionally takes a name for the new -block, and a Function to insert it into. If -the Parent parameter is specified, the new BasicBlock is -automatically inserted at the end of the specified Function, if not specified, the BasicBlock must be -manually inserted into the Function.

  • - -
  • BasicBlock::iterator - Typedef for instruction list iterator
    -BasicBlock::const_iterator - Typedef for const_iterator.
    -begin(), end(), front(), back(), -size(), empty() -STL-style functions for accessing the instruction list. - -

    These methods and typedefs are forwarding functions that have the same -semantics as the standard library methods of the same names. These methods -expose the underlying instruction list of a basic block in a way that is easy to -manipulate. To get the full complement of container operations (including -operations to update the list), you must use the getInstList() -method.

  • - -
  • BasicBlock::InstListType &getInstList() - -

    This method is used to get access to the underlying container that actually -holds the Instructions. This method must be used when there isn't a forwarding -function in the BasicBlock class for the operation that you would like -to perform. Because there are no forwarding functions for "updating" -operations, you need to use this if you want to update the contents of a -BasicBlock.

  • - -
  • Function *getParent() - -

    Returns a pointer to Function the block is -embedded into, or a null pointer if it is homeless.

  • - -
  • TerminatorInst *getTerminator() - -

    Returns a pointer to the terminator instruction that appears at the end of -the BasicBlock. If there is no terminator instruction, or if the last -instruction in the block is not a terminator, then a null pointer is -returned.

  • - -
- -
- -
- - -

- The Argument class -

- -
- -

This subclass of Value defines the interface for incoming formal -arguments to a function. A Function maintains a list of its formal -arguments. An argument has a pointer to the parent Function.

- -
- -
- - -
-
- Valid CSS - Valid HTML 4.01 Strict - - Dinakar Dhurjati and - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-07 02:56:09 +0200 (Sun, 07 Oct 2012) $ -
- - - diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst new file mode 100644 index 000000000000..7864165617a0 --- /dev/null +++ b/docs/ProgrammersManual.rst @@ -0,0 +1,3204 @@ +======================== +LLVM Programmer's Manual +======================== + +.. contents:: + :local: + +.. warning:: + This is always a work in progress. + +.. _introduction: + +Introduction +============ + +This document is meant to highlight some of the important classes and interfaces +available in the LLVM source-base. This manual is not intended to explain what +LLVM is, how it works, and what LLVM code looks like. It assumes that you know +the basics of LLVM and are interested in writing transformations or otherwise +analyzing or manipulating the code. + +This document should get you oriented so that you can find your way in the +continuously growing source code that makes up the LLVM infrastructure. Note +that this manual is not intended to serve as a replacement for reading the +source code, so if you think there should be a method in one of these classes to +do something, but it's not listed, check the source. Links to the `doxygen +`__ sources are provided to make this as easy as +possible. + +The first section of this document describes general information that is useful +to know when working in the LLVM infrastructure, and the second describes the +Core LLVM classes. In the future this manual will be extended with information +describing how to use extension libraries, such as dominator information, CFG +traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen +`__) template. + +.. _general: + +General Information +=================== + +This section contains general information that is useful if you are working in +the LLVM source-base, but that isn't specific to any particular API. + +.. _stl: + +The C++ Standard Template Library +--------------------------------- + +LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much +more than you are used to, or have seen before. Because of this, you might want +to do a little background reading in the techniques used and capabilities of the +library. There are many good pages that discuss the STL, and several books on +the subject that you can get, so it will not be discussed in this document. + +Here are some useful links: + +#. `cppreference.com + `_ - an excellent + reference for the STL and other parts of the standard C++ library. + +#. `C++ In a Nutshell `_ - This is an O'Reilly + book in the making. It has a decent Standard Library Reference that rivals + Dinkumware's, and is unfortunately no longer free since the book has been + published. + +#. `C++ Frequently Asked Questions `_. + +#. `SGI's STL Programmer's Guide `_ - Contains a + useful `Introduction to the STL + `_. + +#. `Bjarne Stroustrup's C++ Page + `_. + +#. `Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 + (even better, get the book) + `_. + +You are also encouraged to take a look at the :doc:`LLVM Coding Standards +` guide which focuses on how to write maintainable code more +than where to put your curly braces. + +.. _resources: + +Other useful references +----------------------- + +#. `Using static and shared libraries across platforms + `_ + +.. _apis: + +Important and useful LLVM APIs +============================== + +Here we highlight some LLVM APIs that are generally useful and good to know +about when writing transformations. + +.. _isa: + +The ``isa<>``, ``cast<>`` and ``dyn_cast<>`` templates +------------------------------------------------------ + +The LLVM source-base makes extensive use of a custom form of RTTI. These +templates have many similarities to the C++ ``dynamic_cast<>`` operator, but +they don't have some drawbacks (primarily stemming from the fact that +``dynamic_cast<>`` only works on classes that have a v-table). Because they are +used so often, you must know what they do and how they work. All of these +templates are defined in the ``llvm/Support/Casting.h`` (`doxygen +`__) file (note that you very +rarely have to include this file directly). + +``isa<>``: + The ``isa<>`` operator works exactly like the Java "``instanceof``" operator. + It returns true or false depending on whether a reference or pointer points to + an instance of the specified class. This can be very useful for constraint + checking of various sorts (example below). + +``cast<>``: + The ``cast<>`` operator is a "checked cast" operation. It converts a pointer + or reference from a base class to a derived class, causing an assertion + failure if it is not really an instance of the right type. This should be + used in cases where you have some information that makes you believe that + something is of the right type. An example of the ``isa<>`` and ``cast<>`` + template is: + + .. code-block:: c++ + + static bool isLoopInvariant(const Value *V, const Loop *L) { + if (isa(V) || isa(V) || isa(V)) + return true; + + // Otherwise, it must be an instruction... + return !L->contains(cast(V)->getParent()); + } + + Note that you should **not** use an ``isa<>`` test followed by a ``cast<>``, + for that use the ``dyn_cast<>`` operator. + +``dyn_cast<>``: + The ``dyn_cast<>`` operator is a "checking cast" operation. It checks to see + if the operand is of the specified type, and if so, returns a pointer to it + (this operator does not work with references). If the operand is not of the + correct type, a null pointer is returned. Thus, this works very much like + the ``dynamic_cast<>`` operator in C++, and should be used in the same + circumstances. Typically, the ``dyn_cast<>`` operator is used in an ``if`` + statement or some other flow control statement like this: + + .. code-block:: c++ + + if (AllocationInst *AI = dyn_cast(Val)) { + // ... + } + + This form of the ``if`` statement effectively combines together a call to + ``isa<>`` and a call to ``cast<>`` into one statement, which is very + convenient. + + Note that the ``dyn_cast<>`` operator, like C++'s ``dynamic_cast<>`` or Java's + ``instanceof`` operator, can be abused. In particular, you should not use big + chained ``if/then/else`` blocks to check for lots of different variants of + classes. If you find yourself wanting to do this, it is much cleaner and more + efficient to use the ``InstVisitor`` class to dispatch over the instruction + type directly. + +``cast_or_null<>``: + The ``cast_or_null<>`` operator works just like the ``cast<>`` operator, + except that it allows for a null pointer as an argument (which it then + propagates). This can sometimes be useful, allowing you to combine several + null checks into one. + +``dyn_cast_or_null<>``: + The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>`` + operator, except that it allows for a null pointer as an argument (which it + then propagates). This can sometimes be useful, allowing you to combine + several null checks into one. + +These five templates can be used with any classes, whether they have a v-table +or not. If you want to add support for these templates, see the document +:doc:`How to set up LLVM-style RTTI for your class hierarchy +` + +.. _string_apis: + +Passing strings (the ``StringRef`` and ``Twine`` classes) +--------------------------------------------------------- + +Although LLVM generally does not do much string manipulation, we do have several +important APIs which take strings. Two important examples are the Value class +-- which has names for instructions, functions, etc. -- and the ``StringMap`` +class which is used extensively in LLVM and Clang. + +These are generic classes, and they need to be able to accept strings which may +have embedded null characters. Therefore, they cannot simply take a ``const +char *``, and taking a ``const std::string&`` requires clients to perform a heap +allocation which is usually unnecessary. Instead, many LLVM APIs use a +``StringRef`` or a ``const Twine&`` for passing strings efficiently. + +.. _StringRef: + +The ``StringRef`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``StringRef`` data type represents a reference to a constant string (a +character array and a length) and supports the common operations available on +``std::string``, but does not require heap allocation. + +It can be implicitly constructed using a C style null-terminated string, an +``std::string``, or explicitly with a character pointer and length. For +example, the ``StringRef`` find function is declared as: + +.. code-block:: c++ + + iterator find(StringRef Key); + +and clients can call it using any one of: + +.. code-block:: c++ + + Map.find("foo"); // Lookup "foo" + Map.find(std::string("bar")); // Lookup "bar" + Map.find(StringRef("\0baz", 4)); // Lookup "\0baz" + +Similarly, APIs which need to return a string may return a ``StringRef`` +instance, which can be used directly or converted to an ``std::string`` using +the ``str`` member function. See ``llvm/ADT/StringRef.h`` (`doxygen +`__) for more +information. + +You should rarely use the ``StringRef`` class directly, because it contains +pointers to external memory it is not generally safe to store an instance of the +class (unless you know that the external storage will not be freed). +``StringRef`` is small and pervasive enough in LLVM that it should always be +passed by value. + +The ``Twine`` class +^^^^^^^^^^^^^^^^^^^ + +The ``Twine`` (`doxygen `__) +class is an efficient way for APIs to accept concatenated strings. For example, +a common LLVM paradigm is to name one instruction based on the name of another +instruction with a suffix, for example: + +.. code-block:: c++ + + New = CmpInst::Create(..., SO->getName() + ".cmp"); + +The ``Twine`` class is effectively a lightweight `rope +`_ which points to +temporary (stack allocated) objects. Twines can be implicitly constructed as +the result of the plus operator applied to strings (i.e., a C strings, an +``std::string``, or a ``StringRef``). The twine delays the actual concatenation +of strings until it is actually required, at which point it can be efficiently +rendered directly into a character array. This avoids unnecessary heap +allocation involved in constructing the temporary results of string +concatenation. See ``llvm/ADT/Twine.h`` (`doxygen +`__) and :ref:`here ` +for more information. + +As with a ``StringRef``, ``Twine`` objects point to external memory and should +almost never be stored or mentioned directly. They are intended solely for use +when defining a function which should be able to efficiently accept concatenated +strings. + +.. _DEBUG: + +The ``DEBUG()`` macro and ``-debug`` option +------------------------------------------- + +Often when working on your pass you will put a bunch of debugging printouts and +other code into your pass. After you get it working, you want to remove it, but +you may need it again in the future (to work out new bugs that you run across). + +Naturally, because of this, you don't want to delete the debug printouts, but +you don't want them to always be noisy. A standard compromise is to comment +them out, allowing you to enable them if you need them in the future. + +The ``llvm/Support/Debug.h`` (`doxygen +`__) file provides a macro named +``DEBUG()`` that is a much nicer solution to this problem. Basically, you can +put arbitrary code into the argument of the ``DEBUG`` macro, and it is only +executed if '``opt``' (or any other tool) is run with the '``-debug``' command +line argument: + +.. code-block:: c++ + + DEBUG(errs() << "I am here!\n"); + +Then you can run your pass like this: + +.. code-block:: none + + $ opt < a.bc > /dev/null -mypass + + $ opt < a.bc > /dev/null -mypass -debug + I am here! + +Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not +have to create "yet another" command line option for the debug output for your +pass. Note that ``DEBUG()`` macros are disabled for optimized builds, so they +do not cause a performance impact at all (for the same reason, they should also +not contain side-effects!). + +One additional nice thing about the ``DEBUG()`` macro is that you can enable or +disable it directly in gdb. Just use "``set DebugFlag=0``" or "``set +DebugFlag=1``" from the gdb if the program is running. If the program hasn't +been started yet, you can always just run it with ``-debug``. + +.. _DEBUG_TYPE: + +Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes you may find yourself in a situation where enabling ``-debug`` just +turns on **too much** information (such as when working on the code generator). +If you want to enable debug information with more fine-grained control, you +define the ``DEBUG_TYPE`` macro and the ``-debug`` only option as follows: + +.. code-block:: c++ + + #undef DEBUG_TYPE + DEBUG(errs() << "No debug type\n"); + #define DEBUG_TYPE "foo" + DEBUG(errs() << "'foo' debug type\n"); + #undef DEBUG_TYPE + #define DEBUG_TYPE "bar" + DEBUG(errs() << "'bar' debug type\n")); + #undef DEBUG_TYPE + #define DEBUG_TYPE "" + DEBUG(errs() << "No debug type (2)\n"); + +Then you can run your pass like this: + +.. code-block:: none + + $ opt < a.bc > /dev/null -mypass + + $ opt < a.bc > /dev/null -mypass -debug + No debug type + 'foo' debug type + 'bar' debug type + No debug type (2) + $ opt < a.bc > /dev/null -mypass -debug-only=foo + 'foo' debug type + $ opt < a.bc > /dev/null -mypass -debug-only=bar + 'bar' debug type + +Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file, +to specify the debug type for the entire module (if you do this before you +``#include "llvm/Support/Debug.h"``, you don't have to insert the ugly +``#undef``'s). Also, you should use names more meaningful than "foo" and "bar", +because there is no system in place to ensure that names do not conflict. If +two different modules use the same string, they will all be turned on when the +name is specified. This allows, for example, all debug information for +instruction scheduling to be enabled with ``-debug-type=InstrSched``, even if +the source lives in multiple files. + +The ``DEBUG_WITH_TYPE`` macro is also available for situations where you would +like to set ``DEBUG_TYPE``, but only for one specific ``DEBUG`` statement. It +takes an additional first parameter, which is the type to use. For example, the +preceding example could be written as: + +.. code-block:: c++ + + DEBUG_WITH_TYPE("", errs() << "No debug type\n"); + DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n"); + DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n")); + DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n"); + +.. _Statistic: + +The ``Statistic`` class & ``-stats`` option +------------------------------------------- + +The ``llvm/ADT/Statistic.h`` (`doxygen +`__) file provides a class +named ``Statistic`` that is used as a unified way to keep track of what the LLVM +compiler is doing and how effective various optimizations are. It is useful to +see what optimizations are contributing to making a particular program run +faster. + +Often you may run your pass on some big program, and you're interested to see +how many times it makes a certain transformation. Although you can do this with +hand inspection, or some ad-hoc method, this is a real pain and not very useful +for big programs. Using the ``Statistic`` class makes it very easy to keep +track of this information, and the calculated information is presented in a +uniform manner with the rest of the passes being executed. + +There are many examples of ``Statistic`` uses, but the basics of using it are as +follows: + +#. Define your statistic like this: + + .. code-block:: c++ + + #define DEBUG_TYPE "mypassname" // This goes before any #includes. + STATISTIC(NumXForms, "The # of times I did stuff"); + + The ``STATISTIC`` macro defines a static variable, whose name is specified by + the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and + the description is taken from the second argument. The variable defined + ("NumXForms" in this case) acts like an unsigned integer. + +#. Whenever you make a transformation, bump the counter: + + .. code-block:: c++ + + ++NumXForms; // I did stuff! + +That's all you have to do. To get '``opt``' to print out the statistics +gathered, use the '``-stats``' option: + +.. code-block:: none + + $ opt -stats -mypassname < program.bc > /dev/null + ... statistics output ... + +When running ``opt`` on a C file from the SPEC benchmark suite, it gives a +report that looks like this: + +.. code-block:: none + + 7646 bitcodewriter - Number of normal instructions + 725 bitcodewriter - Number of oversized instructions + 129996 bitcodewriter - Number of bitcode bytes written + 2817 raise - Number of insts DCEd or constprop'd + 3213 raise - Number of cast-of-self removed + 5046 raise - Number of expression trees converted + 75 raise - Number of other getelementptr's formed + 138 raise - Number of load/store peepholes + 42 deadtypeelim - Number of unused typenames removed from symtab + 392 funcresolve - Number of varargs functions resolved + 27 globaldce - Number of global variables removed + 2 adce - Number of basic blocks removed + 134 cee - Number of branches revectored + 49 cee - Number of setcc instruction eliminated + 532 gcse - Number of loads removed + 2919 gcse - Number of instructions removed + 86 indvars - Number of canonical indvars added + 87 indvars - Number of aux indvars removed + 25 instcombine - Number of dead inst eliminate + 434 instcombine - Number of insts combined + 248 licm - Number of load insts hoisted + 1298 licm - Number of insts hoisted to a loop pre-header + 3 licm - Number of insts hoisted to multiple loop preds (bad, no loop pre-header) + 75 mem2reg - Number of alloca's promoted + 1444 cfgsimplify - Number of blocks simplified + +Obviously, with so many optimizations, having a unified framework for this stuff +is very nice. Making your pass fit well into the framework makes it more +maintainable and useful. + +.. _ViewGraph: + +Viewing graphs while debugging code +----------------------------------- + +Several of the important data structures in LLVM are graphs: for example CFGs +made out of LLVM :ref:`BasicBlocks `, CFGs made out of LLVM +:ref:`MachineBasicBlocks `, and :ref:`Instruction Selection +DAGs `. In many cases, while debugging various parts of the +compiler, it is nice to instantly visualize these graphs. + +LLVM provides several callbacks that are available in a debug build to do +exactly that. If you call the ``Function::viewCFG()`` method, for example, the +current LLVM tool will pop up a window containing the CFG for the function where +each basic block is a node in the graph, and each node contains the instructions +in the block. Similarly, there also exists ``Function::viewCFGOnly()`` (does +not include the instructions), the ``MachineFunction::viewCFG()`` and +``MachineFunction::viewCFGOnly()``, and the ``SelectionDAG::viewGraph()`` +methods. Within GDB, for example, you can usually use something like ``call +DAG.viewGraph()`` to pop up a window. Alternatively, you can sprinkle calls to +these functions in your code in places you want to debug. + +Getting this to work requires a small amount of configuration. On Unix systems +with X11, install the `graphviz `_ toolkit, and make +sure 'dot' and 'gv' are in your path. If you are running on Mac OS/X, download +and install the Mac OS/X `Graphviz program +`_ and add +``/Applications/Graphviz.app/Contents/MacOS/`` (or wherever you install it) to +your path. Once in your system and path are set up, rerun the LLVM configure +script and rebuild LLVM to enable this functionality. + +``SelectionDAG`` has been extended to make it easier to locate *interesting* +nodes in large complex graphs. From gdb, if you ``call DAG.setGraphColor(node, +"color")``, then the next ``call DAG.viewGraph()`` would highlight the node in +the specified color (choices of colors can be found at `colors +`_.) More complex node attributes +can be provided with ``call DAG.setGraphAttrs(node, "attributes")`` (choices can +be found at `Graph attributes `_.) +If you want to restart and clear all the current graph attributes, then you can +``call DAG.clearGraphAttrs()``. + +Note that graph visualization features are compiled out of Release builds to +reduce file size. This means that you need a Debug+Asserts or Release+Asserts +build to use these features. + +.. _datastructure: + +Picking the Right Data Structure for a Task +=========================================== + +LLVM has a plethora of data structures in the ``llvm/ADT/`` directory, and we +commonly use STL data structures. This section describes the trade-offs you +should consider when you pick one. + +The first step is a choose your own adventure: do you want a sequential +container, a set-like container, or a map-like container? The most important +thing when choosing a container is the algorithmic properties of how you plan to +access the container. Based on that, you should use: + + +* a :ref:`map-like ` container if you need efficient look-up of a + value based on another value. Map-like containers also support efficient + queries for containment (whether a key is in the map). Map-like containers + generally do not support efficient reverse mapping (values to keys). If you + need that, use two maps. Some map-like containers also support efficient + iteration through the keys in sorted order. Map-like containers are the most + expensive sort, only use them if you need one of these capabilities. + +* a :ref:`set-like ` container if you need to put a bunch of stuff into + a container that automatically eliminates duplicates. Some set-like + containers support efficient iteration through the elements in sorted order. + Set-like containers are more expensive than sequential containers. + +* a :ref:`sequential ` container provides the most efficient way + to add elements and keeps track of the order they are added to the collection. + They permit duplicates and support efficient iteration, but do not support + efficient look-up based on a key. + +* a :ref:`string ` container is a specialized sequential container or + reference structure that is used for character or byte arrays. + +* a :ref:`bit ` container provides an efficient way to store and + perform set operations on sets of numeric id's, while automatically + eliminating duplicates. Bit containers require a maximum of 1 bit for each + identifier you want to store. + +Once the proper category of container is determined, you can fine tune the +memory use, constant factors, and cache behaviors of access by intelligently +picking a member of the category. Note that constant factors and cache behavior +can be a big deal. If you have a vector that usually only contains a few +elements (but could contain many), for example, it's much better to use +:ref:`SmallVector ` than :ref:`vector `. Doing so +avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding +the elements to the container. + +.. _ds_sequential: + +Sequential Containers (std::vector, std::list, etc) +--------------------------------------------------- + +There are a variety of sequential containers available for you, based on your +needs. Pick the first in this section that will do what you want. + +.. _dss_arrayref: + +llvm/ADT/ArrayRef.h +^^^^^^^^^^^^^^^^^^^ + +The ``llvm::ArrayRef`` class is the preferred class to use in an interface that +accepts a sequential list of elements in memory and just reads from them. By +taking an ``ArrayRef``, the API can be passed a fixed size array, an +``std::vector``, an ``llvm::SmallVector`` and anything else that is contiguous +in memory. + +.. _dss_fixedarrays: + +Fixed Size Arrays +^^^^^^^^^^^^^^^^^ + +Fixed size arrays are very simple and very fast. They are good if you know +exactly how many elements you have, or you have a (low) upper bound on how many +you have. + +.. _dss_heaparrays: + +Heap Allocated Arrays +^^^^^^^^^^^^^^^^^^^^^ + +Heap allocated arrays (``new[]`` + ``delete[]``) are also simple. They are good +if the number of elements is variable, if you know how many elements you will +need before the array is allocated, and if the array is usually large (if not, +consider a :ref:`SmallVector `). The cost of a heap allocated +array is the cost of the new/delete (aka malloc/free). Also note that if you +are allocating an array of a type with a constructor, the constructor and +destructors will be run for every element in the array (re-sizable vectors only +construct those elements actually used). + +.. _dss_tinyptrvector: + +llvm/ADT/TinyPtrVector.h +^^^^^^^^^^^^^^^^^^^^^^^^ + +``TinyPtrVector`` is a highly specialized collection class that is +optimized to avoid allocation in the case when a vector has zero or one +elements. It has two major restrictions: 1) it can only hold values of pointer +type, and 2) it cannot hold a null pointer. + +Since this container is highly specialized, it is rarely used. + +.. _dss_smallvector: + +llvm/ADT/SmallVector.h +^^^^^^^^^^^^^^^^^^^^^^ + +``SmallVector`` is a simple class that looks and smells just like +``vector``: it supports efficient iteration, lays out elements in memory +order (so you can do pointer arithmetic between elements), supports efficient +push_back/pop_back operations, supports efficient random access to its elements, +etc. + +The advantage of SmallVector is that it allocates space for some number of +elements (N) **in the object itself**. Because of this, if the SmallVector is +dynamically smaller than N, no malloc is performed. This can be a big win in +cases where the malloc/free call is far more expensive than the code that +fiddles around with the elements. + +This is good for vectors that are "usually small" (e.g. the number of +predecessors/successors of a block is usually less than 8). On the other hand, +this makes the size of the SmallVector itself large, so you don't want to +allocate lots of them (doing so will waste a lot of space). As such, +SmallVectors are most useful when on the stack. + +SmallVector also provides a nice portable and efficient replacement for +``alloca``. + +.. note:: + + Prefer to use ``SmallVectorImpl`` as a parameter type. + + In APIs that don't care about the "small size" (most?), prefer to use + the ``SmallVectorImpl`` class, which is basically just the "vector + header" (and methods) without the elements allocated after it. Note that + ``SmallVector`` inherits from ``SmallVectorImpl`` so the + conversion is implicit and costs nothing. E.g. + + .. code-block:: c++ + + // BAD: Clients cannot pass e.g. SmallVector. + hardcodedSmallSize(SmallVector &Out); + // GOOD: Clients can pass any SmallVector. + allowsAnySmallSize(SmallVectorImpl &Out); + + void someFunc() { + SmallVector Vec; + hardcodedSmallSize(Vec); // Error. + allowsAnySmallSize(Vec); // Works. + } + + Even though it has "``Impl``" in the name, this is so widely used that + it really isn't "private to the implementation" anymore. A name like + ``SmallVectorHeader`` would be more appropriate. + +.. _dss_vector: + + +^^^^^^^^ + +``std::vector`` is well loved and respected. It is useful when SmallVector +isn't: when the size of the vector is often large (thus the small optimization +will rarely be a benefit) or if you will be allocating many instances of the +vector itself (which would waste space for elements that aren't in the +container). vector is also useful when interfacing with code that expects +vectors :). + +One worthwhile note about std::vector: avoid code like this: + +.. code-block:: c++ + + for ( ... ) { + std::vector V; + // make use of V. + } + +Instead, write this as: + +.. code-block:: c++ + + std::vector V; + for ( ... ) { + // make use of V. + V.clear(); + } + +Doing so will save (at least) one heap allocation and free per iteration of the +loop. + +.. _dss_deque: + + +^^^^^^^ + +``std::deque`` is, in some senses, a generalized version of ``std::vector``. +Like ``std::vector``, it provides constant time random access and other similar +properties, but it also provides efficient access to the front of the list. It +does not guarantee continuity of elements within memory. + +In exchange for this extra flexibility, ``std::deque`` has significantly higher +constant factor costs than ``std::vector``. If possible, use ``std::vector`` or +something cheaper. + +.. _dss_list: + + +^^^^^^ + +``std::list`` is an extremely inefficient class that is rarely useful. It +performs a heap allocation for every element inserted into it, thus having an +extremely high constant factor, particularly for small data types. +``std::list`` also only supports bidirectional iteration, not random access +iteration. + +In exchange for this high cost, std::list supports efficient access to both ends +of the list (like ``std::deque``, but unlike ``std::vector`` or +``SmallVector``). In addition, the iterator invalidation characteristics of +std::list are stronger than that of a vector class: inserting or removing an +element into the list does not invalidate iterator or pointers to other elements +in the list. + +.. _dss_ilist: + +llvm/ADT/ilist.h +^^^^^^^^^^^^^^^^ + +``ilist`` implements an 'intrusive' doubly-linked list. It is intrusive, +because it requires the element to store and provide access to the prev/next +pointers for the list. + +``ilist`` has the same drawbacks as ``std::list``, and additionally requires an +``ilist_traits`` implementation for the element type, but it provides some novel +characteristics. In particular, it can efficiently store polymorphic objects, +the traits class is informed when an element is inserted or removed from the +list, and ``ilist``\ s are guaranteed to support a constant-time splice +operation. + +These properties are exactly what we want for things like ``Instruction``\ s and +basic blocks, which is why these are implemented with ``ilist``\ s. + +Related classes of interest are explained in the following subsections: + +* :ref:`ilist_traits ` + +* :ref:`iplist ` + +* :ref:`llvm/ADT/ilist_node.h ` + +* :ref:`Sentinels ` + +.. _dss_packedvector: + +llvm/ADT/PackedVector.h +^^^^^^^^^^^^^^^^^^^^^^^ + +Useful for storing a vector of values using only a few number of bits for each +value. Apart from the standard operations of a vector-like container, it can +also perform an 'or' set operation. + +For example: + +.. code-block:: c++ + + enum State { + None = 0x0, + FirstCondition = 0x1, + SecondCondition = 0x2, + Both = 0x3 + }; + + State get() { + PackedVector Vec1; + Vec1.push_back(FirstCondition); + + PackedVector Vec2; + Vec2.push_back(SecondCondition); + + Vec1 |= Vec2; + return Vec1[0]; // returns 'Both'. + } + +.. _dss_ilist_traits: + +ilist_traits +^^^^^^^^^^^^ + +``ilist_traits`` is ``ilist``'s customization mechanism. ``iplist`` +(and consequently ``ilist``) publicly derive from this traits class. + +.. _dss_iplist: + +iplist +^^^^^^ + +``iplist`` is ``ilist``'s base and as such supports a slightly narrower +interface. Notably, inserters from ``T&`` are absent. + +``ilist_traits`` is a public base of this class and can be used for a wide +variety of customizations. + +.. _dss_ilist_node: + +llvm/ADT/ilist_node.h +^^^^^^^^^^^^^^^^^^^^^ + +``ilist_node`` implements a the forward and backward links that are expected +by the ``ilist`` (and analogous containers) in the default manner. + +``ilist_node``\ s are meant to be embedded in the node type ``T``, usually +``T`` publicly derives from ``ilist_node``. + +.. _dss_ilist_sentinel: + +Sentinels +^^^^^^^^^ + +``ilist``\ s have another specialty that must be considered. To be a good +citizen in the C++ ecosystem, it needs to support the standard container +operations, such as ``begin`` and ``end`` iterators, etc. Also, the +``operator--`` must work correctly on the ``end`` iterator in the case of +non-empty ``ilist``\ s. + +The only sensible solution to this problem is to allocate a so-called *sentinel* +along with the intrusive list, which serves as the ``end`` iterator, providing +the back-link to the last element. However conforming to the C++ convention it +is illegal to ``operator++`` beyond the sentinel and it also must not be +dereferenced. + +These constraints allow for some implementation freedom to the ``ilist`` how to +allocate and store the sentinel. The corresponding policy is dictated by +``ilist_traits``. By default a ``T`` gets heap-allocated whenever the need +for a sentinel arises. + +While the default policy is sufficient in most cases, it may break down when +``T`` does not provide a default constructor. Also, in the case of many +instances of ``ilist``\ s, the memory overhead of the associated sentinels is +wasted. To alleviate the situation with numerous and voluminous +``T``-sentinels, sometimes a trick is employed, leading to *ghostly sentinels*. + +Ghostly sentinels are obtained by specially-crafted ``ilist_traits`` which +superpose the sentinel with the ``ilist`` instance in memory. Pointer +arithmetic is used to obtain the sentinel, which is relative to the ``ilist``'s +``this`` pointer. The ``ilist`` is augmented by an extra pointer, which serves +as the back-link of the sentinel. This is the only field in the ghostly +sentinel which can be legally accessed. + +.. _dss_other: + +Other Sequential Container options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Other STL containers are available, such as ``std::string``. + +There are also various STL adapter classes such as ``std::queue``, +``std::priority_queue``, ``std::stack``, etc. These provide simplified access +to an underlying container but don't affect the cost of the container itself. + +.. _ds_string: + +String-like containers +---------------------- + +There are a variety of ways to pass around and use strings in C and C++, and +LLVM adds a few new options to choose from. Pick the first option on this list +that will do what you need, they are ordered according to their relative cost. + +Note that is is generally preferred to *not* pass strings around as ``const +char*``'s. These have a number of problems, including the fact that they +cannot represent embedded nul ("\0") characters, and do not have a length +available efficiently. The general replacement for '``const char*``' is +StringRef. + +For more information on choosing string containers for APIs, please see +:ref:`Passing Strings `. + +.. _dss_stringref: + +llvm/ADT/StringRef.h +^^^^^^^^^^^^^^^^^^^^ + +The StringRef class is a simple value class that contains a pointer to a +character and a length, and is quite related to the :ref:`ArrayRef +` class (but specialized for arrays of characters). Because +StringRef carries a length with it, it safely handles strings with embedded nul +characters in it, getting the length does not require a strlen call, and it even +has very convenient APIs for slicing and dicing the character range that it +represents. + +StringRef is ideal for passing simple strings around that are known to be live, +either because they are C string literals, std::string, a C array, or a +SmallVector. Each of these cases has an efficient implicit conversion to +StringRef, which doesn't result in a dynamic strlen being executed. + +StringRef has a few major limitations which make more powerful string containers +useful: + +#. You cannot directly convert a StringRef to a 'const char*' because there is + no way to add a trailing nul (unlike the .c_str() method on various stronger + classes). + +#. StringRef doesn't own or keep alive the underlying string bytes. + As such it can easily lead to dangling pointers, and is not suitable for + embedding in datastructures in most cases (instead, use an std::string or + something like that). + +#. For the same reason, StringRef cannot be used as the return value of a + method if the method "computes" the result string. Instead, use std::string. + +#. StringRef's do not allow you to mutate the pointed-to string bytes and it + doesn't allow you to insert or remove bytes from the range. For editing + operations like this, it interoperates with the :ref:`Twine ` + class. + +Because of its strengths and limitations, it is very common for a function to +take a StringRef and for a method on an object to return a StringRef that points +into some string that it owns. + +.. _dss_twine: + +llvm/ADT/Twine.h +^^^^^^^^^^^^^^^^ + +The Twine class is used as an intermediary datatype for APIs that want to take a +string that can be constructed inline with a series of concatenations. Twine +works by forming recursive instances of the Twine datatype (a simple value +object) on the stack as temporary objects, linking them together into a tree +which is then linearized when the Twine is consumed. Twine is only safe to use +as the argument to a function, and should always be a const reference, e.g.: + +.. code-block:: c++ + + void foo(const Twine &T); + ... + StringRef X = ... + unsigned i = ... + foo(X + "." + Twine(i)); + +This example forms a string like "blarg.42" by concatenating the values +together, and does not form intermediate strings containing "blarg" or "blarg.". + +Because Twine is constructed with temporary objects on the stack, and because +these instances are destroyed at the end of the current statement, it is an +inherently dangerous API. For example, this simple variant contains undefined +behavior and will probably crash: + +.. code-block:: c++ + + void foo(const Twine &T); + ... + StringRef X = ... + unsigned i = ... + const Twine &Tmp = X + "." + Twine(i); + foo(Tmp); + +... because the temporaries are destroyed before the call. That said, Twine's +are much more efficient than intermediate std::string temporaries, and they work +really well with StringRef. Just be aware of their limitations. + +.. _dss_smallstring: + +llvm/ADT/SmallString.h +^^^^^^^^^^^^^^^^^^^^^^ + +SmallString is a subclass of :ref:`SmallVector ` that adds some +convenience APIs like += that takes StringRef's. SmallString avoids allocating +memory in the case when the preallocated space is enough to hold its data, and +it calls back to general heap allocation when required. Since it owns its data, +it is very safe to use and supports full mutation of the string. + +Like SmallVector's, the big downside to SmallString is their sizeof. While they +are optimized for small strings, they themselves are not particularly small. +This means that they work great for temporary scratch buffers on the stack, but +should not generally be put into the heap: it is very rare to see a SmallString +as the member of a frequently-allocated heap data structure or returned +by-value. + +.. _dss_stdstring: + +std::string +^^^^^^^^^^^ + +The standard C++ std::string class is a very general class that (like +SmallString) owns its underlying data. sizeof(std::string) is very reasonable +so it can be embedded into heap data structures and returned by-value. On the +other hand, std::string is highly inefficient for inline editing (e.g. +concatenating a bunch of stuff together) and because it is provided by the +standard library, its performance characteristics depend a lot of the host +standard library (e.g. libc++ and MSVC provide a highly optimized string class, +GCC contains a really slow implementation). + +The major disadvantage of std::string is that almost every operation that makes +them larger can allocate memory, which is slow. As such, it is better to use +SmallVector or Twine as a scratch buffer, but then use std::string to persist +the result. + +.. _ds_set: + +Set-Like Containers (std::set, SmallSet, SetVector, etc) +-------------------------------------------------------- + +Set-like containers are useful when you need to canonicalize multiple values +into a single representation. There are several different choices for how to do +this, providing various trade-offs. + +.. _dss_sortedvectorset: + +A sorted 'vector' +^^^^^^^^^^^^^^^^^ + +If you intend to insert a lot of elements, then do a lot of queries, a great +approach is to use a vector (or other sequential container) with +std::sort+std::unique to remove duplicates. This approach works really well if +your usage pattern has these two distinct phases (insert then query), and can be +coupled with a good choice of :ref:`sequential container `. + +This combination provides the several nice properties: the result data is +contiguous in memory (good for cache locality), has few allocations, is easy to +address (iterators in the final vector are just indices or pointers), and can be +efficiently queried with a standard binary search (e.g. +``std::lower_bound``; if you want the whole range of elements comparing +equal, use ``std::equal_range``). + +.. _dss_smallset: + +llvm/ADT/SmallSet.h +^^^^^^^^^^^^^^^^^^^ + +If you have a set-like data structure that is usually small and whose elements +are reasonably small, a ``SmallSet`` is a good choice. This set has +space for N elements in place (thus, if the set is dynamically smaller than N, +no malloc traffic is required) and accesses them with a simple linear search. +When the set grows beyond 'N' elements, it allocates a more expensive +representation that guarantees efficient access (for most types, it falls back +to std::set, but for pointers it uses something far better, :ref:`SmallPtrSet +`. + +The magic of this class is that it handles small sets extremely efficiently, but +gracefully handles extremely large sets without loss of efficiency. The +drawback is that the interface is quite small: it supports insertion, queries +and erasing, but does not support iteration. + +.. _dss_smallptrset: + +llvm/ADT/SmallPtrSet.h +^^^^^^^^^^^^^^^^^^^^^^ + +SmallPtrSet has all the advantages of ``SmallSet`` (and a ``SmallSet`` of +pointers is transparently implemented with a ``SmallPtrSet``), but also supports +iterators. If more than 'N' insertions are performed, a single quadratically +probed hash table is allocated and grows as needed, providing extremely +efficient access (constant time insertion/deleting/queries with low constant +factors) and is very stingy with malloc traffic. + +Note that, unlike ``std::set``, the iterators of ``SmallPtrSet`` are invalidated +whenever an insertion occurs. Also, the values visited by the iterators are not +visited in sorted order. + +.. _dss_denseset: + +llvm/ADT/DenseSet.h +^^^^^^^^^^^^^^^^^^^ + +DenseSet is a simple quadratically probed hash table. It excels at supporting +small values: it uses a single allocation to hold all of the pairs that are +currently inserted in the set. DenseSet is a great way to unique small values +that are not simple pointers (use :ref:`SmallPtrSet ` for +pointers). Note that DenseSet has the same requirements for the value type that +:ref:`DenseMap ` has. + +.. _dss_sparseset: + +llvm/ADT/SparseSet.h +^^^^^^^^^^^^^^^^^^^^ + +SparseSet holds a small number of objects identified by unsigned keys of +moderate size. It uses a lot of memory, but provides operations that are almost +as fast as a vector. Typical keys are physical registers, virtual registers, or +numbered basic blocks. + +SparseSet is useful for algorithms that need very fast clear/find/insert/erase +and fast iteration over small sets. It is not intended for building composite +data structures. + +.. _dss_sparsemultiset: + +llvm/ADT/SparseMultiSet.h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SparseMultiSet adds multiset behavior to SparseSet, while retaining SparseSet's +desirable attributes. Like SparseSet, it typically uses a lot of memory, but +provides operations that are almost as fast as a vector. Typical keys are +physical registers, virtual registers, or numbered basic blocks. + +SparseMultiSet is useful for algorithms that need very fast +clear/find/insert/erase of the entire collection, and iteration over sets of +elements sharing a key. It is often a more efficient choice than using composite +data structures (e.g. vector-of-vectors, map-of-vectors). It is not intended for +building composite data structures. + +.. _dss_FoldingSet: + +llvm/ADT/FoldingSet.h +^^^^^^^^^^^^^^^^^^^^^ + +FoldingSet is an aggregate class that is really good at uniquing +expensive-to-create or polymorphic objects. It is a combination of a chained +hash table with intrusive links (uniqued objects are required to inherit from +FoldingSetNode) that uses :ref:`SmallVector ` as part of its ID +process. + +Consider a case where you want to implement a "getOrCreateFoo" method for a +complex object (for example, a node in the code generator). The client has a +description of **what** it wants to generate (it knows the opcode and all the +operands), but we don't want to 'new' a node, then try inserting it into a set +only to find out it already exists, at which point we would have to delete it +and return the node that already exists. + +To support this style of client, FoldingSet perform a query with a +FoldingSetNodeID (which wraps SmallVector) that can be used to describe the +element that we want to query for. The query either returns the element +matching the ID or it returns an opaque ID that indicates where insertion should +take place. Construction of the ID usually does not require heap traffic. + +Because FoldingSet uses intrusive links, it can support polymorphic objects in +the set (for example, you can have SDNode instances mixed with LoadSDNodes). +Because the elements are individually allocated, pointers to the elements are +stable: inserting or removing elements does not invalidate any pointers to other +elements. + +.. _dss_set: + + +^^^^^ + +``std::set`` is a reasonable all-around set class, which is decent at many +things but great at nothing. std::set allocates memory for each element +inserted (thus it is very malloc intensive) and typically stores three pointers +per element in the set (thus adding a large amount of per-element space +overhead). It offers guaranteed log(n) performance, which is not particularly +fast from a complexity standpoint (particularly if the elements of the set are +expensive to compare, like strings), and has extremely high constant factors for +lookup, insertion and removal. + +The advantages of std::set are that its iterators are stable (deleting or +inserting an element from the set does not affect iterators or pointers to other +elements) and that iteration over the set is guaranteed to be in sorted order. +If the elements in the set are large, then the relative overhead of the pointers +and malloc traffic is not a big deal, but if the elements of the set are small, +std::set is almost never a good choice. + +.. _dss_setvector: + +llvm/ADT/SetVector.h +^^^^^^^^^^^^^^^^^^^^ + +LLVM's ``SetVector`` is an adapter class that combines your choice of a +set-like container along with a :ref:`Sequential Container ` The +important property that this provides is efficient insertion with uniquing +(duplicate elements are ignored) with iteration support. It implements this by +inserting elements into both a set-like container and the sequential container, +using the set-like container for uniquing and the sequential container for +iteration. + +The difference between SetVector and other sets is that the order of iteration +is guaranteed to match the order of insertion into the SetVector. This property +is really important for things like sets of pointers. Because pointer values +are non-deterministic (e.g. vary across runs of the program on different +machines), iterating over the pointers in the set will not be in a well-defined +order. + +The drawback of SetVector is that it requires twice as much space as a normal +set and has the sum of constant factors from the set-like container and the +sequential container that it uses. Use it **only** if you need to iterate over +the elements in a deterministic order. SetVector is also expensive to delete +elements out of (linear time), unless you use it's "pop_back" method, which is +faster. + +``SetVector`` is an adapter class that defaults to using ``std::vector`` and a +size 16 ``SmallSet`` for the underlying containers, so it is quite expensive. +However, ``"llvm/ADT/SetVector.h"`` also provides a ``SmallSetVector`` class, +which defaults to using a ``SmallVector`` and ``SmallSet`` of a specified size. +If you use this, and if your sets are dynamically smaller than ``N``, you will +save a lot of heap traffic. + +.. _dss_uniquevector: + +llvm/ADT/UniqueVector.h +^^^^^^^^^^^^^^^^^^^^^^^ + +UniqueVector is similar to :ref:`SetVector ` but it retains a +unique ID for each element inserted into the set. It internally contains a map +and a vector, and it assigns a unique ID for each value inserted into the set. + +UniqueVector is very expensive: its cost is the sum of the cost of maintaining +both the map and vector, it has high complexity, high constant factors, and +produces a lot of malloc traffic. It should be avoided. + +.. _dss_immutableset: + +llvm/ADT/ImmutableSet.h +^^^^^^^^^^^^^^^^^^^^^^^ + +ImmutableSet is an immutable (functional) set implementation based on an AVL +tree. Adding or removing elements is done through a Factory object and results +in the creation of a new ImmutableSet object. If an ImmutableSet already exists +with the given contents, then the existing one is returned; equality is compared +with a FoldingSetNodeID. The time and space complexity of add or remove +operations is logarithmic in the size of the original set. + +There is no method for returning an element of the set, you can only check for +membership. + +.. _dss_otherset: + +Other Set-Like Container Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The STL provides several other options, such as std::multiset and the various +"hash_set" like containers (whether from C++ TR1 or from the SGI library). We +never use hash_set and unordered_set because they are generally very expensive +(each insertion requires a malloc) and very non-portable. + +std::multiset is useful if you're not interested in elimination of duplicates, +but has all the drawbacks of std::set. A sorted vector (where you don't delete +duplicate entries) or some other approach is almost always better. + +.. _ds_map: + +Map-Like Containers (std::map, DenseMap, etc) +--------------------------------------------- + +Map-like containers are useful when you want to associate data to a key. As +usual, there are a lot of different ways to do this. :) + +.. _dss_sortedvectormap: + +A sorted 'vector' +^^^^^^^^^^^^^^^^^ + +If your usage pattern follows a strict insert-then-query approach, you can +trivially use the same approach as :ref:`sorted vectors for set-like containers +`. The only difference is that your query function (which +uses std::lower_bound to get efficient log(n) lookup) should only compare the +key, not both the key and value. This yields the same advantages as sorted +vectors for sets. + +.. _dss_stringmap: + +llvm/ADT/StringMap.h +^^^^^^^^^^^^^^^^^^^^ + +Strings are commonly used as keys in maps, and they are difficult to support +efficiently: they are variable length, inefficient to hash and compare when +long, expensive to copy, etc. StringMap is a specialized container designed to +cope with these issues. It supports mapping an arbitrary range of bytes to an +arbitrary other object. + +The StringMap implementation uses a quadratically-probed hash table, where the +buckets store a pointer to the heap allocated entries (and some other stuff). +The entries in the map must be heap allocated because the strings are variable +length. The string data (key) and the element object (value) are stored in the +same allocation with the string data immediately after the element object. +This container guarantees the "``(char*)(&Value+1)``" points to the key string +for a value. + +The StringMap is very fast for several reasons: quadratic probing is very cache +efficient for lookups, the hash value of strings in buckets is not recomputed +when looking up an element, StringMap rarely has to touch the memory for +unrelated objects when looking up a value (even when hash collisions happen), +hash table growth does not recompute the hash values for strings already in the +table, and each pair in the map is store in a single allocation (the string data +is stored in the same allocation as the Value of a pair). + +StringMap also provides query methods that take byte ranges, so it only ever +copies a string if a value is inserted into the table. + +StringMap iteratation order, however, is not guaranteed to be deterministic, so +any uses which require that should instead use a std::map. + +.. _dss_indexmap: + +llvm/ADT/IndexedMap.h +^^^^^^^^^^^^^^^^^^^^^ + +IndexedMap is a specialized container for mapping small dense integers (or +values that can be mapped to small dense integers) to some other type. It is +internally implemented as a vector with a mapping function that maps the keys +to the dense integer range. + +This is useful for cases like virtual registers in the LLVM code generator: they +have a dense mapping that is offset by a compile-time constant (the first +virtual register ID). + +.. _dss_densemap: + +llvm/ADT/DenseMap.h +^^^^^^^^^^^^^^^^^^^ + +DenseMap is a simple quadratically probed hash table. It excels at supporting +small keys and values: it uses a single allocation to hold all of the pairs +that are currently inserted in the map. DenseMap is a great way to map +pointers to pointers, or map other small types to each other. + +There are several aspects of DenseMap that you should be aware of, however. +The iterators in a DenseMap are invalidated whenever an insertion occurs, +unlike map. Also, because DenseMap allocates space for a large number of +key/value pairs (it starts with 64 by default), it will waste a lot of space if +your keys or values are large. Finally, you must implement a partial +specialization of DenseMapInfo for the key that you want, if it isn't already +supported. This is required to tell DenseMap about two special marker values +(which can never be inserted into the map) that it needs internally. + +DenseMap's find_as() method supports lookup operations using an alternate key +type. This is useful in cases where the normal key type is expensive to +construct, but cheap to compare against. The DenseMapInfo is responsible for +defining the appropriate comparison and hashing methods for each alternate key +type used. + +.. _dss_valuemap: + +llvm/ADT/ValueMap.h +^^^^^^^^^^^^^^^^^^^ + +ValueMap is a wrapper around a :ref:`DenseMap ` mapping +``Value*``\ s (or subclasses) to another type. When a Value is deleted or +RAUW'ed, ValueMap will update itself so the new version of the key is mapped to +the same value, just as if the key were a WeakVH. You can configure exactly how +this happens, and what else happens on these two events, by passing a ``Config`` +parameter to the ValueMap template. + +.. _dss_intervalmap: + +llvm/ADT/IntervalMap.h +^^^^^^^^^^^^^^^^^^^^^^ + +IntervalMap is a compact map for small keys and values. It maps key intervals +instead of single keys, and it will automatically coalesce adjacent intervals. +When then map only contains a few intervals, they are stored in the map object +itself to avoid allocations. + +The IntervalMap iterators are quite big, so they should not be passed around as +STL iterators. The heavyweight iterators allow a smaller data structure. + +.. _dss_map: + + +^^^^^ + +std::map has similar characteristics to :ref:`std::set `: it uses a +single allocation per pair inserted into the map, it offers log(n) lookup with +an extremely large constant factor, imposes a space penalty of 3 pointers per +pair in the map, etc. + +std::map is most useful when your keys or values are very large, if you need to +iterate over the collection in sorted order, or if you need stable iterators +into the map (i.e. they don't get invalidated if an insertion or deletion of +another element takes place). + +.. _dss_mapvector: + +llvm/ADT/MapVector.h +^^^^^^^^^^^^^^^^^^^^ + +``MapVector`` provides a subset of the DenseMap interface. The +main difference is that the iteration order is guaranteed to be the insertion +order, making it an easy (but somewhat expensive) solution for non-deterministic +iteration over maps of pointers. + +It is implemented by mapping from key to an index in a vector of key,value +pairs. This provides fast lookup and iteration, but has two main drawbacks: The +key is stored twice and it doesn't support removing elements. + +.. _dss_inteqclasses: + +llvm/ADT/IntEqClasses.h +^^^^^^^^^^^^^^^^^^^^^^^ + +IntEqClasses provides a compact representation of equivalence classes of small +integers. Initially, each integer in the range 0..n-1 has its own equivalence +class. Classes can be joined by passing two class representatives to the +join(a, b) method. Two integers are in the same class when findLeader() returns +the same representative. + +Once all equivalence classes are formed, the map can be compressed so each +integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m +is the total number of equivalence classes. The map must be uncompressed before +it can be edited again. + +.. _dss_immutablemap: + +llvm/ADT/ImmutableMap.h +^^^^^^^^^^^^^^^^^^^^^^^ + +ImmutableMap is an immutable (functional) map implementation based on an AVL +tree. Adding or removing elements is done through a Factory object and results +in the creation of a new ImmutableMap object. If an ImmutableMap already exists +with the given key set, then the existing one is returned; equality is compared +with a FoldingSetNodeID. The time and space complexity of add or remove +operations is logarithmic in the size of the original map. + +.. _dss_othermap: + +Other Map-Like Container Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The STL provides several other options, such as std::multimap and the various +"hash_map" like containers (whether from C++ TR1 or from the SGI library). We +never use hash_set and unordered_set because they are generally very expensive +(each insertion requires a malloc) and very non-portable. + +std::multimap is useful if you want to map a key to multiple values, but has all +the drawbacks of std::map. A sorted vector or some other approach is almost +always better. + +.. _ds_bit: + +Bit storage containers (BitVector, SparseBitVector) +--------------------------------------------------- + +Unlike the other containers, there are only two bit storage containers, and +choosing when to use each is relatively straightforward. + +One additional option is ``std::vector``: we discourage its use for two +reasons 1) the implementation in many common compilers (e.g. commonly +available versions of GCC) is extremely inefficient and 2) the C++ standards +committee is likely to deprecate this container and/or change it significantly +somehow. In any case, please don't use it. + +.. _dss_bitvector: + +BitVector +^^^^^^^^^ + +The BitVector container provides a dynamic size set of bits for manipulation. +It supports individual bit setting/testing, as well as set operations. The set +operations take time O(size of bitvector), but operations are performed one word +at a time, instead of one bit at a time. This makes the BitVector very fast for +set operations compared to other containers. Use the BitVector when you expect +the number of set bits to be high (i.e. a dense set). + +.. _dss_smallbitvector: + +SmallBitVector +^^^^^^^^^^^^^^ + +The SmallBitVector container provides the same interface as BitVector, but it is +optimized for the case where only a small number of bits, less than 25 or so, +are needed. It also transparently supports larger bit counts, but slightly less +efficiently than a plain BitVector, so SmallBitVector should only be used when +larger counts are rare. + +At this time, SmallBitVector does not support set operations (and, or, xor), and +its operator[] does not provide an assignable lvalue. + +.. _dss_sparsebitvector: + +SparseBitVector +^^^^^^^^^^^^^^^ + +The SparseBitVector container is much like BitVector, with one major difference: +Only the bits that are set, are stored. This makes the SparseBitVector much +more space efficient than BitVector when the set is sparse, as well as making +set operations O(number of set bits) instead of O(size of universe). The +downside to the SparseBitVector is that setting and testing of random bits is +O(N), and on large SparseBitVectors, this can be slower than BitVector. In our +implementation, setting or testing bits in sorted order (either forwards or +reverse) is O(1) worst case. Testing and setting bits within 128 bits (depends +on size) of the current bit is also O(1). As a general statement, +testing/setting bits in a SparseBitVector is O(distance away from last set bit). + +.. _common: + +Helpful Hints for Common Operations +=================================== + +This section describes how to perform some very simple transformations of LLVM +code. This is meant to give examples of common idioms used, showing the +practical side of LLVM transformations. + +Because this is a "how-to" section, you should also read about the main classes +that you will be working with. The :ref:`Core LLVM Class Hierarchy Reference +` contains details and descriptions of the main classes that you +should know about. + +.. _inspection: + +Basic Inspection and Traversal Routines +--------------------------------------- + +The LLVM compiler infrastructure have many different data structures that may be +traversed. Following the example of the C++ standard template library, the +techniques used to traverse these various data structures are all basically the +same. For a enumerable sequence of values, the ``XXXbegin()`` function (or +method) returns an iterator to the start of the sequence, the ``XXXend()`` +function returns an iterator pointing to one past the last valid element of the +sequence, and there is some ``XXXiterator`` data type that is common between the +two operations. + +Because the pattern for iteration is common across many different aspects of the +program representation, the standard template library algorithms may be used on +them, and it is easier to remember how to iterate. First we show a few common +examples of the data structures that need to be traversed. Other data +structures are traversed in very similar ways. + +.. _iterate_function: + +Iterating over the ``BasicBlock`` in a ``Function`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It's quite common to have a ``Function`` instance that you'd like to transform +in some way; in particular, you'd like to manipulate its ``BasicBlock``\ s. To +facilitate this, you'll need to iterate over all of the ``BasicBlock``\ s that +constitute the ``Function``. The following is an example that prints the name +of a ``BasicBlock`` and the number of ``Instruction``\ s it contains: + +.. code-block:: c++ + + // func is a pointer to a Function instance + for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i) + // Print out the name of the basic block if it has one, and then the + // number of instructions that it contains + errs() << "Basic block (name=" << i->getName() << ") has " + << i->size() << " instructions.\n"; + +Note that i can be used as if it were a pointer for the purposes of invoking +member functions of the ``Instruction`` class. This is because the indirection +operator is overloaded for the iterator classes. In the above code, the +expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like +you'd expect. + +.. _iterate_basicblock: + +Iterating over the ``Instruction`` in a ``BasicBlock`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Just like when dealing with ``BasicBlock``\ s in ``Function``\ s, it's easy to +iterate over the individual instructions that make up ``BasicBlock``\ s. Here's +a code snippet that prints out each instruction in a ``BasicBlock``: + +.. code-block:: c++ + + // blk is a pointer to a BasicBlock instance + for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i) + // The next statement works since operator<<(ostream&,...) + // is overloaded for Instruction& + errs() << *i << "\n"; + + +However, this isn't really the best way to print out the contents of a +``BasicBlock``! Since the ostream operators are overloaded for virtually +anything you'll care about, you could have just invoked the print routine on the +basic block itself: ``errs() << *blk << "\n";``. + +.. _iterate_insiter: + +Iterating over the ``Instruction`` in a ``Function`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you're finding that you commonly iterate over a ``Function``'s +``BasicBlock``\ s and then that ``BasicBlock``'s ``Instruction``\ s, +``InstIterator`` should be used instead. You'll need to include +``llvm/Support/InstIterator.h`` (`doxygen +`__) and then instantiate +``InstIterator``\ s explicitly in your code. Here's a small example that shows +how to dump all instructions in a function to the standard error stream: + +.. code-block:: c++ + + #include "llvm/Support/InstIterator.h" + + // F is a pointer to a Function instance + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + errs() << *I << "\n"; + +Easy, isn't it? You can also use ``InstIterator``\ s to fill a work list with +its initial contents. For example, if you wanted to initialize a work list to +contain all instructions in a ``Function`` F, all you would need to do is +something like: + +.. code-block:: c++ + + std::set worklist; + // or better yet, SmallPtrSet worklist; + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + worklist.insert(&*I); + +The STL set ``worklist`` would now contain all instructions in the ``Function`` +pointed to by F. + +.. _iterate_convert: + +Turning an iterator into a class pointer (and vice-versa) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes, it'll be useful to grab a reference (or pointer) to a class instance +when all you've got at hand is an iterator. Well, extracting a reference or a +pointer from an iterator is very straight-forward. Assuming that ``i`` is a +``BasicBlock::iterator`` and ``j`` is a ``BasicBlock::const_iterator``: + +.. code-block:: c++ + + Instruction& inst = *i; // Grab reference to instruction reference + Instruction* pinst = &*i; // Grab pointer to instruction reference + const Instruction& inst = *j; + +However, the iterators you'll be working with in the LLVM framework are special: +they will automatically convert to a ptr-to-instance type whenever they need to. +Instead of derferencing the iterator and then taking the address of the result, +you can simply assign the iterator to the proper pointer type and you get the +dereference and address-of operation as a result of the assignment (behind the +scenes, this is a result of overloading casting mechanisms). Thus the last line +of the last example, + +.. code-block:: c++ + + Instruction *pinst = &*i; + +is semantically equivalent to + +.. code-block:: c++ + + Instruction *pinst = i; + +It's also possible to turn a class pointer into the corresponding iterator, and +this is a constant time operation (very efficient). The following code snippet +illustrates use of the conversion constructors provided by LLVM iterators. By +using these, you can explicitly grab the iterator of something without actually +obtaining it via iteration over some structure: + +.. code-block:: c++ + + void printNextInstruction(Instruction* inst) { + BasicBlock::iterator it(inst); + ++it; // After this line, it refers to the instruction after *inst + if (it != inst->getParent()->end()) errs() << *it << "\n"; + } + +Unfortunately, these implicit conversions come at a cost; they prevent these +iterators from conforming to standard iterator conventions, and thus from being +usable with standard algorithms and containers. For example, they prevent the +following code, where ``B`` is a ``BasicBlock``, from compiling: + +.. code-block:: c++ + + llvm::SmallVector(B->begin(), B->end()); + +Because of this, these implicit conversions may be removed some day, and +``operator*`` changed to return a pointer instead of a reference. + +.. _iterate_complex: + +Finding call sites: a slightly more complex example +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Say that you're writing a FunctionPass and would like to count all the locations +in the entire module (that is, across every ``Function``) where a certain +function (i.e., some ``Function *``) is already in scope. As you'll learn +later, you may want to use an ``InstVisitor`` to accomplish this in a much more +straight-forward manner, but this example will allow us to explore how you'd do +it if you didn't have ``InstVisitor`` around. In pseudo-code, this is what we +want to do: + +.. code-block:: none + + initialize callCounter to zero + for each Function f in the Module + for each BasicBlock b in f + for each Instruction i in b + if (i is a CallInst and calls the given function) + increment callCounter + +And the actual code is (remember, because we're writing a ``FunctionPass``, our +``FunctionPass``-derived class simply has to override the ``runOnFunction`` +method): + +.. code-block:: c++ + + Function* targetFunc = ...; + + class OurFunctionPass : public FunctionPass { + public: + OurFunctionPass(): callCounter(0) { } + + virtual runOnFunction(Function& F) { + for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) { + for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) { + if (CallInst* callInst = dyn_cast(&*i)) { + // We know we've encountered a call instruction, so we + // need to determine if it's a call to the + // function pointed to by m_func or not. + if (callInst->getCalledFunction() == targetFunc) + ++callCounter; + } + } + } + } + + private: + unsigned callCounter; + }; + +.. _calls_and_invokes: + +Treating calls and invokes the same way +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may have noticed that the previous example was a bit oversimplified in that +it did not deal with call sites generated by 'invoke' instructions. In this, +and in other situations, you may find that you want to treat ``CallInst``\ s and +``InvokeInst``\ s the same way, even though their most-specific common base +class is ``Instruction``, which includes lots of less closely-related things. +For these cases, LLVM provides a handy wrapper class called ``CallSite`` +(`doxygen `__) It is +essentially a wrapper around an ``Instruction`` pointer, with some methods that +provide functionality common to ``CallInst``\ s and ``InvokeInst``\ s. + +This class has "value semantics": it should be passed by value, not by reference +and it should not be dynamically allocated or deallocated using ``operator new`` +or ``operator delete``. It is efficiently copyable, assignable and +constructable, with costs equivalents to that of a bare pointer. If you look at +its definition, it has only a single pointer member. + +.. _iterate_chains: + +Iterating over def-use & use-def chains +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Frequently, we might have an instance of the ``Value`` class (`doxygen +`__) and we want to determine +which ``User`` s use the ``Value``. The list of all ``User``\ s of a particular +``Value`` is called a *def-use* chain. For example, let's say we have a +``Function*`` named ``F`` to a particular function ``foo``. Finding all of the +instructions that *use* ``foo`` is as simple as iterating over the *def-use* +chain of ``F``: + +.. code-block:: c++ + + Function *F = ...; + + for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i) + if (Instruction *Inst = dyn_cast(*i)) { + errs() << "F is used in instruction:\n"; + errs() << *Inst << "\n"; + } + +Note that dereferencing a ``Value::use_iterator`` is not a very cheap operation. +Instead of performing ``*i`` above several times, consider doing it only once in +the loop body and reusing its result. + +Alternatively, it's common to have an instance of the ``User`` Class (`doxygen +`__) and need to know what +``Value``\ s are used by it. The list of all ``Value``\ s used by a ``User`` is +known as a *use-def* chain. Instances of class ``Instruction`` are common +``User`` s, so we might want to iterate over all of the values that a particular +instruction uses (that is, the operands of the particular ``Instruction``): + +.. code-block:: c++ + + Instruction *pi = ...; + + for (User::op_iterator i = pi->op_begin(), e = pi->op_end(); i != e; ++i) { + Value *v = *i; + // ... + } + +Declaring objects as ``const`` is an important tool of enforcing mutation free +algorithms (such as analyses, etc.). For this purpose above iterators come in +constant flavors as ``Value::const_use_iterator`` and +``Value::const_op_iterator``. They automatically arise when calling +``use/op_begin()`` on ``const Value*``\ s or ``const User*``\ s respectively. +Upon dereferencing, they return ``const Use*``\ s. Otherwise the above patterns +remain unchanged. + +.. _iterate_preds: + +Iterating over predecessors & successors of blocks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Iterating over the predecessors and successors of a block is quite easy with the +routines defined in ``"llvm/Support/CFG.h"``. Just use code like this to +iterate over all predecessors of BB: + +.. code-block:: c++ + + #include "llvm/Support/CFG.h" + BasicBlock *BB = ...; + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *Pred = *PI; + // ... + } + +Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``. + +.. _simplechanges: + +Making simple changes +--------------------- + +There are some primitive transformation operations present in the LLVM +infrastructure that are worth knowing about. When performing transformations, +it's fairly common to manipulate the contents of basic blocks. This section +describes some of the common methods for doing so and gives example code. + +.. _schanges_creating: + +Creating and inserting new ``Instruction``\ s +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +*Instantiating Instructions* + +Creation of ``Instruction``\ s is straight-forward: simply call the constructor +for the kind of instruction to instantiate and provide the necessary parameters. +For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus: + +.. code-block:: c++ + + AllocaInst* ai = new AllocaInst(Type::Int32Ty); + +will create an ``AllocaInst`` instance that represents the allocation of one +integer in the current stack frame, at run time. Each ``Instruction`` subclass +is likely to have varying default parameters which change the semantics of the +instruction, so refer to the `doxygen documentation for the subclass of +Instruction `_ that +you're interested in instantiating. + +*Naming values* + +It is very useful to name the values of instructions when you're able to, as +this facilitates the debugging of your transformations. If you end up looking +at generated LLVM machine code, you definitely want to have logical names +associated with the results of instructions! By supplying a value for the +``Name`` (default) parameter of the ``Instruction`` constructor, you associate a +logical name with the result of the instruction's execution at run time. For +example, say that I'm writing a transformation that dynamically allocates space +for an integer on the stack, and that integer is going to be used as some kind +of index by some other code. To accomplish this, I place an ``AllocaInst`` at +the first point in the first ``BasicBlock`` of some ``Function``, and I'm +intending to use it within the same ``Function``. I might do: + +.. code-block:: c++ + + AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc"); + +where ``indexLoc`` is now the logical name of the instruction's execution value, +which is a pointer to an integer on the run time stack. + +*Inserting instructions* + +There are essentially two ways to insert an ``Instruction`` into an existing +sequence of instructions that form a ``BasicBlock``: + +* Insertion into an explicit instruction list + + Given a ``BasicBlock* pb``, an ``Instruction* pi`` within that ``BasicBlock``, + and a newly-created instruction we wish to insert before ``*pi``, we do the + following: + + .. code-block:: c++ + + BasicBlock *pb = ...; + Instruction *pi = ...; + Instruction *newInst = new Instruction(...); + + pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb + + Appending to the end of a ``BasicBlock`` is so common that the ``Instruction`` + class and ``Instruction``-derived classes provide constructors which take a + pointer to a ``BasicBlock`` to be appended to. For example code that looked + like: + + .. code-block:: c++ + + BasicBlock *pb = ...; + Instruction *newInst = new Instruction(...); + + pb->getInstList().push_back(newInst); // Appends newInst to pb + + becomes: + + .. code-block:: c++ + + BasicBlock *pb = ...; + Instruction *newInst = new Instruction(..., pb); + + which is much cleaner, especially if you are creating long instruction + streams. + +* Insertion into an implicit instruction list + + ``Instruction`` instances that are already in ``BasicBlock``\ s are implicitly + associated with an existing instruction list: the instruction list of the + enclosing basic block. Thus, we could have accomplished the same thing as the + above code without being given a ``BasicBlock`` by doing: + + .. code-block:: c++ + + Instruction *pi = ...; + Instruction *newInst = new Instruction(...); + + pi->getParent()->getInstList().insert(pi, newInst); + + In fact, this sequence of steps occurs so frequently that the ``Instruction`` + class and ``Instruction``-derived classes provide constructors which take (as + a default parameter) a pointer to an ``Instruction`` which the newly-created + ``Instruction`` should precede. That is, ``Instruction`` constructors are + capable of inserting the newly-created instance into the ``BasicBlock`` of a + provided instruction, immediately before that instruction. Using an + ``Instruction`` constructor with a ``insertBefore`` (default) parameter, the + above code becomes: + + .. code-block:: c++ + + Instruction* pi = ...; + Instruction* newInst = new Instruction(..., pi); + + which is much cleaner, especially if you're creating a lot of instructions and + adding them to ``BasicBlock``\ s. + +.. _schanges_deleting: + +Deleting Instructions +^^^^^^^^^^^^^^^^^^^^^ + +Deleting an instruction from an existing sequence of instructions that form a +BasicBlock_ is very straight-forward: just call the instruction's +``eraseFromParent()`` method. For example: + +.. code-block:: c++ + + Instruction *I = .. ; + I->eraseFromParent(); + +This unlinks the instruction from its containing basic block and deletes it. If +you'd just like to unlink the instruction from its containing basic block but +not delete it, you can use the ``removeFromParent()`` method. + +.. _schanges_replacing: + +Replacing an Instruction with another Value +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Replacing individual instructions +""""""""""""""""""""""""""""""""" + +Including "`llvm/Transforms/Utils/BasicBlockUtils.h +`_" permits use of two +very useful replace functions: ``ReplaceInstWithValue`` and +``ReplaceInstWithInst``. + +.. _schanges_deleting_sub: + +Deleting Instructions +""""""""""""""""""""" + +* ``ReplaceInstWithValue`` + + This function replaces all uses of a given instruction with a value, and then + removes the original instruction. The following example illustrates the + replacement of the result of a particular ``AllocaInst`` that allocates memory + for a single integer with a null pointer to an integer. + + .. code-block:: c++ + + AllocaInst* instToReplace = ...; + BasicBlock::iterator ii(instToReplace); + + ReplaceInstWithValue(instToReplace->getParent()->getInstList(), ii, + Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty))); + +* ``ReplaceInstWithInst`` + + This function replaces a particular instruction with another instruction, + inserting the new instruction into the basic block at the location where the + old instruction was, and replacing any uses of the old instruction with the + new instruction. The following example illustrates the replacement of one + ``AllocaInst`` with another. + + .. code-block:: c++ + + AllocaInst* instToReplace = ...; + BasicBlock::iterator ii(instToReplace); + + ReplaceInstWithInst(instToReplace->getParent()->getInstList(), ii, + new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt")); + + +Replacing multiple uses of Users and Values +""""""""""""""""""""""""""""""""""""""""""" + +You can use ``Value::replaceAllUsesWith`` and ``User::replaceUsesOfWith`` to +change more than one use at a time. See the doxygen documentation for the +`Value Class `_ and `User Class +`_, respectively, for more +information. + +.. _schanges_deletingGV: + +Deleting GlobalVariables +^^^^^^^^^^^^^^^^^^^^^^^^ + +Deleting a global variable from a module is just as easy as deleting an +Instruction. First, you must have a pointer to the global variable that you +wish to delete. You use this pointer to erase it from its parent, the module. +For example: + +.. code-block:: c++ + + GlobalVariable *GV = .. ; + + GV->eraseFromParent(); + + +.. _create_types: + +How to Create Types +------------------- + +In generating IR, you may need some complex types. If you know these types +statically, you can use ``TypeBuilder<...>::get()``, defined in +``llvm/Support/TypeBuilder.h``, to retrieve them. ``TypeBuilder`` has two forms +depending on whether you're building types for cross-compilation or native +library use. ``TypeBuilder`` requires that ``T`` be independent of the +host environment, meaning that it's built out of types from the ``llvm::types`` +(`doxygen `__) namespace +and pointers, functions, arrays, etc. built of those. ``TypeBuilder`` +additionally allows native C types whose size may depend on the host compiler. +For example, + +.. code-block:: c++ + + FunctionType *ft = TypeBuilder(types::i<32>*), true>::get(); + +is easier to read and write than the equivalent + +.. code-block:: c++ + + std::vector params; + params.push_back(PointerType::getUnqual(Type::Int32Ty)); + FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false); + +See the `class comment +`_ for more details. + +.. _threading: + +Threads and LLVM +================ + +This section describes the interaction of the LLVM APIs with multithreading, +both on the part of client applications, and in the JIT, in the hosted +application. + +Note that LLVM's support for multithreading is still relatively young. Up +through version 2.5, the execution of threaded hosted applications was +supported, but not threaded client access to the APIs. While this use case is +now supported, clients *must* adhere to the guidelines specified below to ensure +proper operation in multithreaded mode. + +Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic +intrinsics in order to support threaded operation. If you need a +multhreading-capable LLVM on a platform without a suitably modern system +compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and +using the resultant compiler to build a copy of LLVM with multithreading +support. + +.. _startmultithreaded: + +Entering and Exiting Multithreaded Mode +--------------------------------------- + +In order to properly protect its internal data structures while avoiding +excessive locking overhead in the single-threaded case, the LLVM must intialize +certain data structures necessary to provide guards around its internals. To do +so, the client program must invoke ``llvm_start_multithreaded()`` before making +any concurrent LLVM API calls. To subsequently tear down these structures, use +the ``llvm_stop_multithreaded()`` call. You can also use the +``llvm_is_multithreaded()`` call to check the status of multithreaded mode. + +Note that both of these calls must be made *in isolation*. That is to say that +no other LLVM API calls may be executing at any time during the execution of +``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``. It's is the +client's responsibility to enforce this isolation. + +The return value of ``llvm_start_multithreaded()`` indicates the success or +failure of the initialization. Failure typically indicates that your copy of +LLVM was built without multithreading support, typically because GCC atomic +intrinsics were not found in your system compiler. In this case, the LLVM API +will not be safe for concurrent calls. However, it *will* be safe for hosting +threaded applications in the JIT, though :ref:`care must be taken +` to ensure that side exits and the like do not accidentally +result in concurrent LLVM API calls. + +.. _shutdown: + +Ending Execution with ``llvm_shutdown()`` +----------------------------------------- + +When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to +deallocate memory used for internal structures. This will also invoke +``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode. As +such, ``llvm_shutdown()`` requires the same isolation guarantees as +``llvm_stop_multithreaded()``. + +Note that, if you use scope-based shutdown, you can use the +``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor. + +.. _managedstatic: + +Lazy Initialization with ``ManagedStatic`` +------------------------------------------ + +``ManagedStatic`` is a utility class in LLVM used to implement static +initialization of static resources, such as the global type tables. Before the +invocation of ``llvm_shutdown()``, it implements a simple lazy initialization +scheme. Once ``llvm_start_multithreaded()`` returns, however, it uses +double-checked locking to implement thread-safe lazy initialization. + +Note that, because no other threads are allowed to issue LLVM API calls before +``llvm_start_multithreaded()`` returns, it is possible to have +``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s. + +The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide +access to the global lock used to implement the double-checked locking for lazy +initialization. These should only be used internally to LLVM, and only if you +know what you're doing! + +.. _llvmcontext: + +Achieving Isolation with ``LLVMContext`` +---------------------------------------- + +``LLVMContext`` is an opaque class in the LLVM API which clients can use to +operate multiple, isolated instances of LLVM concurrently within the same +address space. For instance, in a hypothetical compile-server, the compilation +of an individual translation unit is conceptually independent from all the +others, and it would be desirable to be able to compile incoming translation +units concurrently on independent server threads. Fortunately, ``LLVMContext`` +exists to enable just this kind of scenario! + +Conceptually, ``LLVMContext`` provides isolation. Every LLVM entity +(``Module``\ s, ``Value``\ s, ``Type``\ s, ``Constant``\ s, etc.) in LLVM's +in-memory IR belongs to an ``LLVMContext``. Entities in different contexts +*cannot* interact with each other: ``Module``\ s in different contexts cannot be +linked together, ``Function``\ s cannot be added to ``Module``\ s in different +contexts, etc. What this means is that is is safe to compile on multiple +threads simultaneously, as long as no two threads operate on entities within the +same context. + +In practice, very few places in the API require the explicit specification of a +``LLVMContext``, other than the ``Type`` creation/lookup APIs. Because every +``Type`` carries a reference to its owning context, most other entities can +determine what context they belong to by looking at their own ``Type``. If you +are adding new entities to LLVM IR, please try to maintain this interface +design. + +For clients that do *not* require the benefits of isolation, LLVM provides a +convenience API ``getGlobalContext()``. This returns a global, lazily +initialized ``LLVMContext`` that may be used in situations where isolation is +not a concern. + +.. _jitthreading: + +Threads and the JIT +------------------- + +LLVM's "eager" JIT compiler is safe to use in threaded programs. Multiple +threads can call ``ExecutionEngine::getPointerToFunction()`` or +``ExecutionEngine::runFunction()`` concurrently, and multiple threads can run +code output by the JIT concurrently. The user must still ensure that only one +thread accesses IR in a given ``LLVMContext`` while another thread might be +modifying it. One way to do that is to always hold the JIT lock while accessing +IR outside the JIT (the JIT *modifies* the IR by adding ``CallbackVH``\ s). +Another way is to only call ``getPointerToFunction()`` from the +``LLVMContext``'s thread. + +When the JIT is configured to compile lazily (using +``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race +condition `_ in updating call sites +after a function is lazily-jitted. It's still possible to use the lazy JIT in a +threaded program if you ensure that only one thread at a time can call any +particular lazy stub and that the JIT lock guards any IR access, but we suggest +using only the eager JIT in threaded programs. + +.. _advanced: + +Advanced Topics +=============== + +This section describes some of the advanced or obscure API's that most clients +do not need to be aware of. These API's tend manage the inner workings of the +LLVM system, and only need to be accessed in unusual circumstances. + +.. _SymbolTable: + +The ``ValueSymbolTable`` class +------------------------------ + +The ``ValueSymbolTable`` (`doxygen +`__) class provides +a symbol table that the :ref:`Function ` and Module_ classes use for +naming value definitions. The symbol table can provide a name for any Value_. + +Note that the ``SymbolTable`` class should not be directly accessed by most +clients. It should only be used when iteration over the symbol table names +themselves are required, which is very special purpose. Note that not all LLVM +Value_\ s have names, and those without names (i.e. they have an empty name) do +not exist in the symbol table. + +Symbol tables support iteration over the values in the symbol table with +``begin/end/iterator`` and supports querying to see if a specific name is in the +symbol table (with ``lookup``). The ``ValueSymbolTable`` class exposes no +public mutator methods, instead, simply call ``setName`` on a value, which will +autoinsert it into the appropriate symbol table. + +.. _UserLayout: + +The ``User`` and owned ``Use`` classes' memory layout +----------------------------------------------------- + +The ``User`` (`doxygen `__) +class provides a basis for expressing the ownership of ``User`` towards other +`Value instance `_\ s. The +``Use`` (`doxygen `__) helper +class is employed to do the bookkeeping and to facilitate *O(1)* addition and +removal. + +.. _Use2User: + +Interaction and relationship between ``User`` and ``Use`` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A subclass of ``User`` can choose between incorporating its ``Use`` objects or +refer to them out-of-line by means of a pointer. A mixed variant (some ``Use`` +s inline others hung off) is impractical and breaks the invariant that the +``Use`` objects belonging to the same ``User`` form a contiguous array. + +We have 2 different layouts in the ``User`` (sub)classes: + +* Layout a) + + The ``Use`` object(s) are inside (resp. at fixed offset) of the ``User`` + object and there are a fixed number of them. + +* Layout b) + + The ``Use`` object(s) are referenced by a pointer to an array from the + ``User`` object and there may be a variable number of them. + +As of v2.4 each layout still possesses a direct pointer to the start of the +array of ``Use``\ s. Though not mandatory for layout a), we stick to this +redundancy for the sake of simplicity. The ``User`` object also stores the +number of ``Use`` objects it has. (Theoretically this information can also be +calculated given the scheme presented below.) + +Special forms of allocation operators (``operator new``) enforce the following +memory layouts: + +* Layout a) is modelled by prepending the ``User`` object by the ``Use[]`` + array. + + .. code-block:: none + + ...---.---.---.---.-------... + | P | P | P | P | User + '''---'---'---'---'-------''' + +* Layout b) is modelled by pointing at the ``Use[]`` array. + + .. code-block:: none + + .-------... + | User + '-------''' + | + v + .---.---.---.---... + | P | P | P | P | + '---'---'---'---''' + +*(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in +each* ``Use`` *object in the member* ``Use::Prev`` *)* + +.. _Waymarking: + +The waymarking algorithm +^^^^^^^^^^^^^^^^^^^^^^^^ + +Since the ``Use`` objects are deprived of the direct (back)pointer to their +``User`` objects, there must be a fast and exact method to recover it. This is +accomplished by the following scheme: + +A bit-encoding in the 2 LSBits (least significant bits) of the ``Use::Prev`` +allows to find the start of the ``User`` object: + +* ``00`` --- binary digit 0 + +* ``01`` --- binary digit 1 + +* ``10`` --- stop and calculate (``s``) + +* ``11`` --- full stop (``S``) + +Given a ``Use*``, all we have to do is to walk till we get a stop and we either +have a ``User`` immediately behind or we have to walk to the next stop picking +up digits and calculating the offset: + +.. code-block:: none + + .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---------------- + | 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*) + '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---------------- + |+15 |+10 |+6 |+3 |+1 + | | | | | __> + | | | | __________> + | | | ______________________> + | | ______________________________________> + | __________________________________________________________> + +Only the significant number of bits need to be stored between the stops, so that +the *worst case is 20 memory accesses* when there are 1000 ``Use`` objects +associated with a ``User``. + +.. _ReferenceImpl: + +Reference implementation +^^^^^^^^^^^^^^^^^^^^^^^^ + +The following literate Haskell fragment demonstrates the concept: + +.. code-block:: haskell + + > import Test.QuickCheck + > + > digits :: Int -> [Char] -> [Char] + > digits 0 acc = '0' : acc + > digits 1 acc = '1' : acc + > digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc + > + > dist :: Int -> [Char] -> [Char] + > dist 0 [] = ['S'] + > dist 0 acc = acc + > dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r + > dist n acc = dist (n - 1) $ dist 1 acc + > + > takeLast n ss = reverse $ take n $ reverse ss + > + > test = takeLast 40 $ dist 20 [] + > + +Printing gives: ``"1s100000s11010s10100s1111s1010s110s11s1S"`` + +The reverse algorithm computes the length of the string just by examining a +certain prefix: + +.. code-block:: haskell + + > pref :: [Char] -> Int + > pref "S" = 1 + > pref ('s':'1':rest) = decode 2 1 rest + > pref (_:rest) = 1 + pref rest + > + > decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest + > decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest + > decode walk acc _ = walk + acc + > + +Now, as expected, printing gives ``40``. + +We can *quickCheck* this with following property: + +.. code-block:: haskell + + > testcase = dist 2000 [] + > testcaseLength = length testcase + > + > identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr + > where arr = takeLast n testcase + > + +As expected gives: + +:: + + *Main> quickCheck identityProp + OK, passed 100 tests. + +Let's be a bit more exhaustive: + +.. code-block:: haskell + + > + > deepCheck p = check (defaultConfig { configMaxTest = 500 }) p + > + +And here is the result of : + +:: + + *Main> deepCheck identityProp + OK, passed 500 tests. + +.. _Tagging: + +Tagging considerations +^^^^^^^^^^^^^^^^^^^^^^ + +To maintain the invariant that the 2 LSBits of each ``Use**`` in ``Use`` never +change after being set up, setters of ``Use::Prev`` must re-tag the new +``Use**`` on every modification. Accordingly getters must strip the tag bits. + +For layout b) instead of the ``User`` we find a pointer (``User*`` with LSBit +set). Following this pointer brings us to the ``User``. A portable trick +ensures that the first bytes of ``User`` (if interpreted as a pointer) never has +the LSBit set. (Portability is relying on the fact that all known compilers +place the ``vptr`` in the first word of the instances.) + +.. _coreclasses: + +The Core LLVM Class Hierarchy Reference +======================================= + +``#include "llvm/Type.h"`` + +header source: `Type.h `_ + +doxygen info: `Type Clases `_ + +The Core LLVM classes are the primary means of representing the program being +inspected or transformed. The core LLVM classes are defined in header files in +the ``include/llvm/`` directory, and implemented in the ``lib/VMCore`` +directory. + +.. _Type: + +The Type class and Derived Types +-------------------------------- + +``Type`` is a superclass of all type classes. Every ``Value`` has a ``Type``. +``Type`` cannot be instantiated directly but only through its subclasses. +Certain primitive types (``VoidType``, ``LabelType``, ``FloatType`` and +``DoubleType``) have hidden subclasses. They are hidden because they offer no +useful functionality beyond what the ``Type`` class offers except to distinguish +themselves from other subclasses of ``Type``. + +All other types are subclasses of ``DerivedType``. Types can be named, but this +is not a requirement. There exists exactly one instance of a given shape at any +one time. This allows type equality to be performed with address equality of +the Type Instance. That is, given two ``Type*`` values, the types are identical +if the pointers are identical. + +.. _m_Type: + +Important Public Methods +^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``bool isIntegerTy() const``: Returns true for any integer type. + +* ``bool isFloatingPointTy()``: Return true if this is one of the five + floating point types. + +* ``bool isSized()``: Return true if the type has known size. Things + that don't have a size are abstract types, labels and void. + +.. _derivedtypes: + +Important Derived Types +^^^^^^^^^^^^^^^^^^^^^^^ + +``IntegerType`` + Subclass of DerivedType that represents integer types of any bit width. Any + bit width between ``IntegerType::MIN_INT_BITS`` (1) and + ``IntegerType::MAX_INT_BITS`` (~8 million) can be represented. + + * ``static const IntegerType* get(unsigned NumBits)``: get an integer + type of a specific bit width. + + * ``unsigned getBitWidth() const``: Get the bit width of an integer type. + +``SequentialType`` + This is subclassed by ArrayType, PointerType and VectorType. + + * ``const Type * getElementType() const``: Returns the type of each + of the elements in the sequential type. + +``ArrayType`` + This is a subclass of SequentialType and defines the interface for array + types. + + * ``unsigned getNumElements() const``: Returns the number of elements + in the array. + +``PointerType`` + Subclass of SequentialType for pointer types. + +``VectorType`` + Subclass of SequentialType for vector types. A vector type is similar to an + ArrayType but is distinguished because it is a first class type whereas + ArrayType is not. Vector types are used for vector operations and are usually + small vectors of of an integer or floating point type. + +``StructType`` + Subclass of DerivedTypes for struct types. + +.. _FunctionType: + +``FunctionType`` + Subclass of DerivedTypes for function types. + + * ``bool isVarArg() const``: Returns true if it's a vararg function. + + * ``const Type * getReturnType() const``: Returns the return type of the + function. + + * ``const Type * getParamType (unsigned i)``: Returns the type of the ith + parameter. + + * ``const unsigned getNumParams() const``: Returns the number of formal + parameters. + +.. _Module: + +The ``Module`` class +-------------------- + +``#include "llvm/Module.h"`` + +header source: `Module.h `_ + +doxygen info: `Module Class `_ + +The ``Module`` class represents the top level structure present in LLVM +programs. An LLVM module is effectively either a translation unit of the +original program or a combination of several translation units merged by the +linker. The ``Module`` class keeps track of a list of :ref:`Function +`\ s, a list of GlobalVariable_\ s, and a SymbolTable_. +Additionally, it contains a few helpful member functions that try to make common +operations easy. + +.. _m_Module: + +Important Public Members of the ``Module`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``Module::Module(std::string name = "")`` + + Constructing a Module_ is easy. You can optionally provide a name for it + (probably based on the name of the translation unit). + +* | ``Module::iterator`` - Typedef for function list iterator + | ``Module::const_iterator`` - Typedef for const_iterator. + | ``begin()``, ``end()``, ``size()``, ``empty()`` + + These are forwarding methods that make it easy to access the contents of a + ``Module`` object's :ref:`Function ` list. + +* ``Module::FunctionListType &getFunctionList()`` + + Returns the list of :ref:`Function `\ s. This is necessary to use + when you need to update the list or perform a complex action that doesn't have + a forwarding method. + +---------------- + +* | ``Module::global_iterator`` - Typedef for global variable list iterator + | ``Module::const_global_iterator`` - Typedef for const_iterator. + | ``global_begin()``, ``global_end()``, ``global_size()``, ``global_empty()`` + + These are forwarding methods that make it easy to access the contents of a + ``Module`` object's GlobalVariable_ list. + +* ``Module::GlobalListType &getGlobalList()`` + + Returns the list of GlobalVariable_\ s. This is necessary to use when you + need to update the list or perform a complex action that doesn't have a + forwarding method. + +---------------- + +* ``SymbolTable *getSymbolTable()`` + + Return a reference to the SymbolTable_ for this ``Module``. + +---------------- + +* ``Function *getFunction(StringRef Name) const`` + + Look up the specified function in the ``Module`` SymbolTable_. If it does not + exist, return ``null``. + +* ``Function *getOrInsertFunction(const std::string &Name, const FunctionType + *T)`` + + Look up the specified function in the ``Module`` SymbolTable_. If it does not + exist, add an external declaration for the function and return it. + +* ``std::string getTypeName(const Type *Ty)`` + + If there is at least one entry in the SymbolTable_ for the specified Type_, + return it. Otherwise return the empty string. + +* ``bool addTypeName(const std::string &Name, const Type *Ty)`` + + Insert an entry in the SymbolTable_ mapping ``Name`` to ``Ty``. If there is + already an entry for this name, true is returned and the SymbolTable_ is not + modified. + +.. _Value: + +The ``Value`` class +------------------- + +``#include "llvm/Value.h"`` + +header source: `Value.h `_ + +doxygen info: `Value Class `_ + +The ``Value`` class is the most important class in the LLVM Source base. It +represents a typed value that may be used (among other things) as an operand to +an instruction. There are many different types of ``Value``\ s, such as +Constant_\ s, Argument_\ s. Even Instruction_\ s and :ref:`Function +`\ s are ``Value``\ s. + +A particular ``Value`` may be used many times in the LLVM representation for a +program. For example, an incoming argument to a function (represented with an +instance of the Argument_ class) is "used" by every instruction in the function +that references the argument. To keep track of this relationship, the ``Value`` +class keeps a list of all of the ``User``\ s that is using it (the User_ class +is a base class for all nodes in the LLVM graph that can refer to ``Value``\ s). +This use list is how LLVM represents def-use information in the program, and is +accessible through the ``use_*`` methods, shown below. + +Because LLVM is a typed representation, every LLVM ``Value`` is typed, and this +Type_ is available through the ``getType()`` method. In addition, all LLVM +values can be named. The "name" of the ``Value`` is a symbolic string printed +in the LLVM code: + +.. code-block:: llvm + + %foo = add i32 1, 2 + +.. _nameWarning: + +The name of this instruction is "foo". **NOTE** that the name of any value may +be missing (an empty string), so names should **ONLY** be used for debugging +(making the source code easier to read, debugging printouts), they should not be +used to keep track of values or map between them. For this purpose, use a +``std::map`` of pointers to the ``Value`` itself instead. + +One important aspect of LLVM is that there is no distinction between an SSA +variable and the operation that produces it. Because of this, any reference to +the value produced by an instruction (or the value available as an incoming +argument, for example) is represented as a direct pointer to the instance of the +class that represents this value. Although this may take some getting used to, +it simplifies the representation and makes it easier to manipulate. + +.. _m_Value: + +Important Public Members of the ``Value`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* | ``Value::use_iterator`` - Typedef for iterator over the use-list + | ``Value::const_use_iterator`` - Typedef for const_iterator over the + use-list + | ``unsigned use_size()`` - Returns the number of users of the value. + | ``bool use_empty()`` - Returns true if there are no users. + | ``use_iterator use_begin()`` - Get an iterator to the start of the + use-list. + | ``use_iterator use_end()`` - Get an iterator to the end of the use-list. + | ``User *use_back()`` - Returns the last element in the list. + + These methods are the interface to access the def-use information in LLVM. + As with all other iterators in LLVM, the naming conventions follow the + conventions defined by the STL_. + +* ``Type *getType() const`` + This method returns the Type of the Value. + +* | ``bool hasName() const`` + | ``std::string getName() const`` + | ``void setName(const std::string &Name)`` + + This family of methods is used to access and assign a name to a ``Value``, be + aware of the :ref:`precaution above `. + +* ``void replaceAllUsesWith(Value *V)`` + + This method traverses the use list of a ``Value`` changing all User_\ s of the + current value to refer to "``V``" instead. For example, if you detect that an + instruction always produces a constant value (for example through constant + folding), you can replace all uses of the instruction with the constant like + this: + + .. code-block:: c++ + + Inst->replaceAllUsesWith(ConstVal); + +.. _User: + +The ``User`` class +------------------ + +``#include "llvm/User.h"`` + +header source: `User.h `_ + +doxygen info: `User Class `_ + +Superclass: Value_ + +The ``User`` class is the common base class of all LLVM nodes that may refer to +``Value``\ s. It exposes a list of "Operands" that are all of the ``Value``\ s +that the User is referring to. The ``User`` class itself is a subclass of +``Value``. + +The operands of a ``User`` point directly to the LLVM ``Value`` that it refers +to. Because LLVM uses Static Single Assignment (SSA) form, there can only be +one definition referred to, allowing this direct connection. This connection +provides the use-def information in LLVM. + +.. _m_User: + +Important Public Members of the ``User`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``User`` class exposes the operand list in two ways: through an index access +interface and through an iterator based interface. + +* | ``Value *getOperand(unsigned i)`` + | ``unsigned getNumOperands()`` + + These two methods expose the operands of the ``User`` in a convenient form for + direct access. + +* | ``User::op_iterator`` - Typedef for iterator over the operand list + | ``op_iterator op_begin()`` - Get an iterator to the start of the operand + list. + | ``op_iterator op_end()`` - Get an iterator to the end of the operand list. + + Together, these methods make up the iterator based interface to the operands + of a ``User``. + + +.. _Instruction: + +The ``Instruction`` class +------------------------- + +``#include "llvm/Instruction.h"`` + +header source: `Instruction.h +`_ + +doxygen info: `Instruction Class +`_ + +Superclasses: User_, Value_ + +The ``Instruction`` class is the common base class for all LLVM instructions. +It provides only a few methods, but is a very commonly used class. The primary +data tracked by the ``Instruction`` class itself is the opcode (instruction +type) and the parent BasicBlock_ the ``Instruction`` is embedded into. To +represent a specific type of instruction, one of many subclasses of +``Instruction`` are used. + +Because the ``Instruction`` class subclasses the User_ class, its operands can +be accessed in the same way as for other ``User``\ s (with the +``getOperand()``/``getNumOperands()`` and ``op_begin()``/``op_end()`` methods). +An important file for the ``Instruction`` class is the ``llvm/Instruction.def`` +file. This file contains some meta-data about the various different types of +instructions in LLVM. It describes the enum values that are used as opcodes +(for example ``Instruction::Add`` and ``Instruction::ICmp``), as well as the +concrete sub-classes of ``Instruction`` that implement the instruction (for +example BinaryOperator_ and CmpInst_). Unfortunately, the use of macros in this +file confuses doxygen, so these enum values don't show up correctly in the +`doxygen output `_. + +.. _s_Instruction: + +Important Subclasses of the ``Instruction`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _BinaryOperator: + +* ``BinaryOperator`` + + This subclasses represents all two operand instructions whose operands must be + the same type, except for the comparison instructions. + +.. _CastInst: + +* ``CastInst`` + This subclass is the parent of the 12 casting instructions. It provides + common operations on cast instructions. + +.. _CmpInst: + +* ``CmpInst`` + + This subclass respresents the two comparison instructions, + `ICmpInst `_ (integer opreands), and + `FCmpInst `_ (floating point operands). + +.. _TerminatorInst: + +* ``TerminatorInst`` + + This subclass is the parent of all terminator instructions (those which can + terminate a block). + +.. _m_Instruction: + +Important Public Members of the ``Instruction`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``BasicBlock *getParent()`` + + Returns the BasicBlock_ that this + ``Instruction`` is embedded into. + +* ``bool mayWriteToMemory()`` + + Returns true if the instruction writes to memory, i.e. it is a ``call``, + ``free``, ``invoke``, or ``store``. + +* ``unsigned getOpcode()`` + + Returns the opcode for the ``Instruction``. + +* ``Instruction *clone() const`` + + Returns another instance of the specified instruction, identical in all ways + to the original except that the instruction has no parent (i.e. it's not + embedded into a BasicBlock_), and it has no name. + +.. _Constant: + +The ``Constant`` class and subclasses +------------------------------------- + +Constant represents a base class for different types of constants. It is +subclassed by ConstantInt, ConstantArray, etc. for representing the various +types of Constants. GlobalValue_ is also a subclass, which represents the +address of a global variable or function. + +.. _s_Constant: + +Important Subclasses of Constant +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ConstantInt : This subclass of Constant represents an integer constant of + any width. + + * ``const APInt& getValue() const``: Returns the underlying + value of this constant, an APInt value. + + * ``int64_t getSExtValue() const``: Converts the underlying APInt value to an + int64_t via sign extension. If the value (not the bit width) of the APInt + is too large to fit in an int64_t, an assertion will result. For this + reason, use of this method is discouraged. + + * ``uint64_t getZExtValue() const``: Converts the underlying APInt value + to a uint64_t via zero extension. IF the value (not the bit width) of the + APInt is too large to fit in a uint64_t, an assertion will result. For this + reason, use of this method is discouraged. + + * ``static ConstantInt* get(const APInt& Val)``: Returns the ConstantInt + object that represents the value provided by ``Val``. The type is implied + as the IntegerType that corresponds to the bit width of ``Val``. + + * ``static ConstantInt* get(const Type *Ty, uint64_t Val)``: Returns the + ConstantInt object that represents the value provided by ``Val`` for integer + type ``Ty``. + +* ConstantFP : This class represents a floating point constant. + + * ``double getValue() const``: Returns the underlying value of this constant. + +* ConstantArray : This represents a constant array. + + * ``const std::vector &getValues() const``: Returns a vector of + component constants that makeup this array. + +* ConstantStruct : This represents a constant struct. + + * ``const std::vector &getValues() const``: Returns a vector of + component constants that makeup this array. + +* GlobalValue : This represents either a global variable or a function. In + either case, the value is a constant fixed address (after linking). + +.. _GlobalValue: + +The ``GlobalValue`` class +------------------------- + +``#include "llvm/GlobalValue.h"`` + +header source: `GlobalValue.h +`_ + +doxygen info: `GlobalValue Class +`_ + +Superclasses: Constant_, User_, Value_ + +Global values ( GlobalVariable_\ s or :ref:`Function `\ s) are the +only LLVM values that are visible in the bodies of all :ref:`Function +`\ s. Because they are visible at global scope, they are also +subject to linking with other globals defined in different translation units. +To control the linking process, ``GlobalValue``\ s know their linkage rules. +Specifically, ``GlobalValue``\ s know whether they have internal or external +linkage, as defined by the ``LinkageTypes`` enumeration. + +If a ``GlobalValue`` has internal linkage (equivalent to being ``static`` in C), +it is not visible to code outside the current translation unit, and does not +participate in linking. If it has external linkage, it is visible to external +code, and does participate in linking. In addition to linkage information, +``GlobalValue``\ s keep track of which Module_ they are currently part of. + +Because ``GlobalValue``\ s are memory objects, they are always referred to by +their **address**. As such, the Type_ of a global is always a pointer to its +contents. It is important to remember this when using the ``GetElementPtrInst`` +instruction because this pointer must be dereferenced first. For example, if +you have a ``GlobalVariable`` (a subclass of ``GlobalValue)`` that is an array +of 24 ints, type ``[24 x i32]``, then the ``GlobalVariable`` is a pointer to +that array. Although the address of the first element of this array and the +value of the ``GlobalVariable`` are the same, they have different types. The +``GlobalVariable``'s type is ``[24 x i32]``. The first element's type is +``i32.`` Because of this, accessing a global value requires you to dereference +the pointer with ``GetElementPtrInst`` first, then its elements can be accessed. +This is explained in the `LLVM Language Reference Manual +`_. + +.. _m_GlobalValue: + +Important Public Members of the ``GlobalValue`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* | ``bool hasInternalLinkage() const`` + | ``bool hasExternalLinkage() const`` + | ``void setInternalLinkage(bool HasInternalLinkage)`` + + These methods manipulate the linkage characteristics of the ``GlobalValue``. + +* ``Module *getParent()`` + + This returns the Module_ that the + GlobalValue is currently embedded into. + +.. _c_Function: + +The ``Function`` class +---------------------- + +``#include "llvm/Function.h"`` + +header source: `Function.h `_ + +doxygen info: `Function Class +`_ + +Superclasses: GlobalValue_, Constant_, User_, Value_ + +The ``Function`` class represents a single procedure in LLVM. It is actually +one of the more complex classes in the LLVM hierarchy because it must keep track +of a large amount of data. The ``Function`` class keeps track of a list of +BasicBlock_\ s, a list of formal Argument_\ s, and a SymbolTable_. + +The list of BasicBlock_\ s is the most commonly used part of ``Function`` +objects. The list imposes an implicit ordering of the blocks in the function, +which indicate how the code will be laid out by the backend. Additionally, the +first BasicBlock_ is the implicit entry node for the ``Function``. It is not +legal in LLVM to explicitly branch to this initial block. There are no implicit +exit nodes, and in fact there may be multiple exit nodes from a single +``Function``. If the BasicBlock_ list is empty, this indicates that the +``Function`` is actually a function declaration: the actual body of the function +hasn't been linked in yet. + +In addition to a list of BasicBlock_\ s, the ``Function`` class also keeps track +of the list of formal Argument_\ s that the function receives. This container +manages the lifetime of the Argument_ nodes, just like the BasicBlock_ list does +for the BasicBlock_\ s. + +The SymbolTable_ is a very rarely used LLVM feature that is only used when you +have to look up a value by name. Aside from that, the SymbolTable_ is used +internally to make sure that there are not conflicts between the names of +Instruction_\ s, BasicBlock_\ s, or Argument_\ s in the function body. + +Note that ``Function`` is a GlobalValue_ and therefore also a Constant_. The +value of the function is its address (after linking) which is guaranteed to be +constant. + +.. _m_Function: + +Important Public Members of the ``Function`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``Function(const FunctionType *Ty, LinkageTypes Linkage, + const std::string &N = "", Module* Parent = 0)`` + + Constructor used when you need to create new ``Function``\ s to add the + program. The constructor must specify the type of the function to create and + what type of linkage the function should have. The FunctionType_ argument + specifies the formal arguments and return value for the function. The same + FunctionType_ value can be used to create multiple functions. The ``Parent`` + argument specifies the Module in which the function is defined. If this + argument is provided, the function will automatically be inserted into that + module's list of functions. + +* ``bool isDeclaration()`` + + Return whether or not the ``Function`` has a body defined. If the function is + "external", it does not have a body, and thus must be resolved by linking with + a function defined in a different translation unit. + +* | ``Function::iterator`` - Typedef for basic block list iterator + | ``Function::const_iterator`` - Typedef for const_iterator. + | ``begin()``, ``end()``, ``size()``, ``empty()`` + + These are forwarding methods that make it easy to access the contents of a + ``Function`` object's BasicBlock_ list. + +* ``Function::BasicBlockListType &getBasicBlockList()`` + + Returns the list of BasicBlock_\ s. This is necessary to use when you need to + update the list or perform a complex action that doesn't have a forwarding + method. + +* | ``Function::arg_iterator`` - Typedef for the argument list iterator + | ``Function::const_arg_iterator`` - Typedef for const_iterator. + | ``arg_begin()``, ``arg_end()``, ``arg_size()``, ``arg_empty()`` + + These are forwarding methods that make it easy to access the contents of a + ``Function`` object's Argument_ list. + +* ``Function::ArgumentListType &getArgumentList()`` + + Returns the list of Argument_. This is necessary to use when you need to + update the list or perform a complex action that doesn't have a forwarding + method. + +* ``BasicBlock &getEntryBlock()`` + + Returns the entry ``BasicBlock`` for the function. Because the entry block + for the function is always the first block, this returns the first block of + the ``Function``. + +* | ``Type *getReturnType()`` + | ``FunctionType *getFunctionType()`` + + This traverses the Type_ of the ``Function`` and returns the return type of + the function, or the FunctionType_ of the actual function. + +* ``SymbolTable *getSymbolTable()`` + + Return a pointer to the SymbolTable_ for this ``Function``. + +.. _GlobalVariable: + +The ``GlobalVariable`` class +---------------------------- + +``#include "llvm/GlobalVariable.h"`` + +header source: `GlobalVariable.h +`_ + +doxygen info: `GlobalVariable Class +`_ + +Superclasses: GlobalValue_, Constant_, User_, Value_ + +Global variables are represented with the (surprise surprise) ``GlobalVariable`` +class. Like functions, ``GlobalVariable``\ s are also subclasses of +GlobalValue_, and as such are always referenced by their address (global values +must live in memory, so their "name" refers to their constant address). See +GlobalValue_ for more on this. Global variables may have an initial value +(which must be a Constant_), and if they have an initializer, they may be marked +as "constant" themselves (indicating that their contents never change at +runtime). + +.. _m_GlobalVariable: + +Important Public Members of the ``GlobalVariable`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage, + Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)`` + + Create a new global variable of the specified type. If ``isConstant`` is true + then the global variable will be marked as unchanging for the program. The + Linkage parameter specifies the type of linkage (internal, external, weak, + linkonce, appending) for the variable. If the linkage is InternalLinkage, + WeakAnyLinkage, WeakODRLinkage, LinkOnceAnyLinkage or LinkOnceODRLinkage, then + the resultant global variable will have internal linkage. AppendingLinkage + concatenates together all instances (in different translation units) of the + variable into a single variable but is only applicable to arrays. See the + `LLVM Language Reference `_ for further details + on linkage types. Optionally an initializer, a name, and the module to put + the variable into may be specified for the global variable as well. + +* ``bool isConstant() const`` + + Returns true if this is a global variable that is known not to be modified at + runtime. + +* ``bool hasInitializer()`` + + Returns true if this ``GlobalVariable`` has an intializer. + +* ``Constant *getInitializer()`` + + Returns the initial value for a ``GlobalVariable``. It is not legal to call + this method if there is no initializer. + +.. _BasicBlock: + +The ``BasicBlock`` class +------------------------ + +``#include "llvm/BasicBlock.h"`` + +header source: `BasicBlock.h +`_ + +doxygen info: `BasicBlock Class +`_ + +Superclass: Value_ + +This class represents a single entry single exit section of the code, commonly +known as a basic block by the compiler community. The ``BasicBlock`` class +maintains a list of Instruction_\ s, which form the body of the block. Matching +the language definition, the last element of this list of instructions is always +a terminator instruction (a subclass of the TerminatorInst_ class). + +In addition to tracking the list of instructions that make up the block, the +``BasicBlock`` class also keeps track of the :ref:`Function ` that +it is embedded into. + +Note that ``BasicBlock``\ s themselves are Value_\ s, because they are +referenced by instructions like branches and can go in the switch tables. +``BasicBlock``\ s have type ``label``. + +.. _m_BasicBlock: + +Important Public Members of the ``BasicBlock`` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``BasicBlock(const std::string &Name = "", Function *Parent = 0)`` + + The ``BasicBlock`` constructor is used to create new basic blocks for + insertion into a function. The constructor optionally takes a name for the + new block, and a :ref:`Function ` to insert it into. If the + ``Parent`` parameter is specified, the new ``BasicBlock`` is automatically + inserted at the end of the specified :ref:`Function `, if not + specified, the BasicBlock must be manually inserted into the :ref:`Function + `. + +* | ``BasicBlock::iterator`` - Typedef for instruction list iterator + | ``BasicBlock::const_iterator`` - Typedef for const_iterator. + | ``begin()``, ``end()``, ``front()``, ``back()``, + ``size()``, ``empty()`` + STL-style functions for accessing the instruction list. + + These methods and typedefs are forwarding functions that have the same + semantics as the standard library methods of the same names. These methods + expose the underlying instruction list of a basic block in a way that is easy + to manipulate. To get the full complement of container operations (including + operations to update the list), you must use the ``getInstList()`` method. + +* ``BasicBlock::InstListType &getInstList()`` + + This method is used to get access to the underlying container that actually + holds the Instructions. This method must be used when there isn't a + forwarding function in the ``BasicBlock`` class for the operation that you + would like to perform. Because there are no forwarding functions for + "updating" operations, you need to use this if you want to update the contents + of a ``BasicBlock``. + +* ``Function *getParent()`` + + Returns a pointer to :ref:`Function ` the block is embedded into, + or a null pointer if it is homeless. + +* ``TerminatorInst *getTerminator()`` + + Returns a pointer to the terminator instruction that appears at the end of the + ``BasicBlock``. If there is no terminator instruction, or if the last + instruction in the block is not a terminator, then a null pointer is returned. + +.. _Argument: + +The ``Argument`` class +---------------------- + +This subclass of Value defines the interface for incoming formal arguments to a +function. A Function maintains a list of its formal arguments. An argument has +a pointer to the parent Function. + + diff --git a/docs/Projects.rst b/docs/Projects.rst index 63132887a599..3246e3ff169b 100644 --- a/docs/Projects.rst +++ b/docs/Projects.rst @@ -1,5 +1,3 @@ -.. _projects: - ======================== Creating an LLVM Project ======================== @@ -153,12 +151,10 @@ Underneath your top level directory, you should have the following directories: Currently, the LLVM build system provides basic support for tests. The LLVM system provides the following: -* LLVM provides a ``tcl`` procedure that is used by ``Dejagnu`` to run tests. - It can be found in ``llvm/lib/llvm-dg.exp``. This test procedure uses ``RUN`` +* LLVM contains regression tests in ``llvm/test``. These tests are run by the + :doc:`Lit ` testing tool. This test procedure uses ``RUN`` lines in the actual test case to determine how to run the test. See the - `TestingGuide `_ for more details. You can easily write - Makefile support similar to the Makefiles in ``llvm/test`` to use ``Dejagnu`` - to run your project's tests. + :doc:`TestingGuide` for more details. * LLVM contains an optional package called ``llvm-test``, which provides benchmarks and programs that are known to compile with the Clang front diff --git a/docs/README.txt b/docs/README.txt index 5ddd599d8a78..22cf93077959 100644 --- a/docs/README.txt +++ b/docs/README.txt @@ -1,12 +1,42 @@ LLVM Documentation ================== -The LLVM documentation is currently written in two formats: +LLVM's documentation is written in reStructuredText, a lightweight +plaintext markup language (file extension `.rst`). While the +reStructuredText documentation should be quite readable in source form, it +is mostly meant to be processed by the Sphinx documentation generation +system to create HTML pages which are hosted on and +updated after every commit. Manpage output is also supported, see below. - * Plain HTML documentation. +If you instead would like to generate and view the HTML locally, install +Sphinx and then do: - * reStructured Text documentation using the Sphinx documentation generator. It - is currently tested with Sphinx 1.1.3. + cd docs/ + make -f Makefile.sphinx + $BROWSER _build/html/index.html - For more information, see the "Sphinx Introduction for LLVM Developers" - document. +The mapping between reStructuredText files and generated documentation is +`docs/Foo.rst` <-> `_build/html/Foo.html` <-> `http://llvm.org/docs/Foo.html`. + +If you are interested in writing new documentation, you will want to read +`SphinxQuickstartTemplate.rst` which will get you writing documentation +very fast and includes examples of the most important reStructuredText +markup syntax. + +Manpage Output +=============== + +Building the manpages is similar to building the HTML documentation. The +primary difference is to use the `man` makefile target, instead of the +default (which is `html`). Sphinx then produces the man pages in the +directory `_build/man/`. + + cd docs/ + make -f Makefile.sphinx man + man -l _build/man/FileCheck.1 + +The correspondence between .rst files and man pages is +`docs/CommandGuide/Foo.rst` <-> `_build/man/Foo.1`. +These .rst files are also included during HTML generation so they are also +viewable online (as noted above) at e.g. +`http://llvm.org/docs/CommandGuide/Foo.html`. diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html deleted file mode 100644 index a4c5960c1555..000000000000 --- a/docs/ReleaseNotes.html +++ /dev/null @@ -1,975 +0,0 @@ - - - - - - LLVM 3.2 Release Notes - - - -

LLVM 3.2 Release Notes

- -
-LLVM Dragon Logo -
- -
    -
  1. Introduction
  2. -
  3. Sub-project Status Update
  4. -
  5. External Projects Using LLVM 3.2
  6. -
  7. What's New in LLVM?
  8. -
  9. Installation Instructions
  10. -
  11. Known Problems
  12. -
  13. Additional Information
  14. -
- -
-

Written by the LLVM Team

-
- - -

- Introduction -

- - -
- -

This document contains the release notes for the LLVM Compiler - Infrastructure, release 3.2. Here we describe the status of LLVM, including - major improvements from the previous release, improvements in various - sub-projects of LLVM, and some of the current users of the code. All LLVM - releases may be downloaded from the LLVM - releases web site.

- -

For more information about LLVM, including information about the latest - release, please check out the main LLVM web - site. If you have questions or comments, - the LLVM - Developer's Mailing List is a good place to send them.

- -

Note that if you are reading this file from a Subversion checkout or the main - LLVM web page, this document applies to the next release, not the - current one. To see the release notes for a specific release, please see the - releases page.

- -
- - - -

- Sub-project Status Update -

- - -
- -

The LLVM 3.2 distribution currently consists of production-quality code - from the core LLVM repository, which roughly includes the LLVM optimizers, - code generators and supporting tools, as well as Clang, DragonEgg and - compiler-rt sub-project repositories. In addition to this code, the LLVM - Project includes other sub-projects that are in development. Here we - include updates on these sub-projects.

- - -

-Clang: C/C++/Objective-C Frontend Toolkit -

- -
- -

Clang is an LLVM front end for the C, - C++, and Objective-C languages. Clang aims to provide a better user - experience through expressive diagnostics, a high level of conformance to - language standards, fast compilation, and low memory use. Like LLVM, Clang - provides a modular, library-based architecture that makes it suitable for - creating or integrating with other development tools.

- -

In the LLVM 3.2 time-frame, the Clang team has made many improvements. - Highlights include:

-
    -
  • Improvements to Clang's diagnostics
  • -
  • Support for tls_model attribute
  • -
  • Type safety attributes
  • -
- -

For more details about the changes to Clang since the 3.1 release, see the - Clang 3.2 release - notes.

- -

If Clang rejects your code but another compiler accepts it, please take a - look at the language - compatibility guide to make sure this is not intentional or a known - issue.

- -
- - -

-DragonEgg: GCC front-ends, LLVM back-end -

- -
- -

DragonEgg is a - gcc plugin that replaces GCC's - optimizers and code generators with LLVM's. It works with gcc-4.5 and gcc-4.6 - (and partially with gcc-4.7), can target the x86-32/x86-64 and ARM processor - families, and has been successfully used on the Darwin, FreeBSD, KFreeBSD, - Linux and OpenBSD platforms. It fully supports Ada, C, C++ and Fortran. It - has partial support for Go, Java, Obj-C and Obj-C++.

- -

The 3.2 release has the following notable changes:

- -
    -
  • Able to load LLVM plugins such as Polly.
  • -
  • Supports thread-local storage models.
  • -
  • Passes knowledge of variable lifetimes to the LLVM optimizers.
  • -
  • No longer requires GCC to be built with LTO support.
  • -
- -
- - -

-compiler-rt: Compiler Runtime Library -

- -
- - -

The LLVM compiler-rt project - is a simple library that provides an implementation of the low-level - target-specific hooks required by code generation and other runtime - components. For example, when compiling for a 32-bit target, converting a - double to a 64-bit unsigned integer is compiled into a runtime call to the - __fixunsdfdi function. The compiler-rt library provides highly - optimized implementations of this and other low-level routines (some are 3x - faster than the equivalent libgcc routines).

- -

The 3.2 release has the following notable changes:

- -
    -
  • ThreadSanitizer (TSan) - data race detector run-time library for C/C++ has been added.
  • -
  • Improvements to AddressSanitizer including: better portability - (OSX, Android NDK), support for cmake based builds, enhanced error reporting and lots of bug fixes.
  • -
  • Added support for A6 'Swift' CPU.
  • -
  • divsi3 function has been enhanced to take advantage of a hardware unsigned divide when it is available.
  • -
- -
- - -

-LLDB: Low Level Debugger -

- -
- -

LLDB is a ground-up implementation of a - command line debugger, as well as a debugger API that can be used from other - applications. LLDB makes use of the Clang parser to provide high-fidelity - expression parsing (particularly for C++) and uses the LLVM JIT for target - support.

- -

The 3.2 release has the following notable changes:

- -
    -
  • Linux build fixes for clang (see Building LLDB)
  • -
  • Some Linux stability and usability improvements
  • -
  • Switch expression evaluation to use MCJIT (from legacy JIT) on Linux
  • -
- -
- - -

-libc++: C++ Standard Library -

- -
- -

Like compiler_rt, libc++ is now dual - licensed under the MIT and UIUC license, allowing it to be used more - permissively.

- -

Within the LLVM 3.2 time-frame there were the following highlights:

- -
    -
  • C++11 shared_ptr atomic access API (20.7.2.5) has been implemented.
  • -
  • Applied noexcept and constexpr throughout library.
  • -
  • Improved C++11 conformance in associative container emplace.
  • -
  • Performance improvements in: std::rotate algorithm and I/O.
  • -
  • Operator new/delete and type_infos for exception types moved from libc++ to libc++abi.
  • -
  • Bug fixes in: <atomic>; vector<bool> algorithms, - <future>,<tuple>, - <type_traits>,<fstream>,<istream>, - <iterator>, <condition_variable>,<complex> as well as visibility fixes. -
- -
- - -

-VMKit -

- -
- -

The VMKit project is an implementation - of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and - just-in-time compilation.

- -

The 3.2 release has the following notable changes:

- -
    -
  • Bug fixes only, no functional changes.
  • -
- -
- - - -

-Polly: Polyhedral Optimizer -

- -
- -

Polly is an experimental - optimizer for data locality and parallelism. It currently provides high-level - loop optimizations and automatic parallelization (using the OpenMP run time). - Work in the area of automatic SIMD and accelerator code generation was - started.

- -

Within the LLVM 3.2 time-frame there were the following highlights:

- -
    -
  • isl, the integer set library used by Polly, was relicensed under the MIT license.
  • -
  • isl based code generation.
  • -
  • MIT licensed replacement for CLooG (LGPLv2).
  • -
  • Fine grained option handling (separation of core and border computations, control overhead vs. code size).
  • -
  • Support for FORTRAN and Dragonegg.
  • -
  • OpenMP code generation fixes.
  • -
- -
- - -

-Clang Static Analyzer -

- -
- -

The Clang Static Analyzer - is an advanced source code analysis tool integrated into Clang that performs - a deep analysis of code to find potential bugs.

- -

In the LLVM 3.2 release, the static analyzer has made significant improvements - in many areas, with notable highlights such as:

- -
    -
  • Improved interprocedural analysis within a translation unit (see details below), which greatly amplified the analyzer's ability to find bugs.
  • -
  • New infrastructure to model "well-known" APIs, allowing the analyzer to do a much better job when modeling calls to such functions.
  • -
  • Significant improvements to the APIs to write static analyzer checkers, with a more unified way of representing function/method calls in the checker API. Details can be found in the Building a Checker in 24 hours talk. -
- -

The release specifically includes notable improvements for Objective-C analysis, including:

- -
    -
  • Interprocedural analysis for Objective-C methods.
  • -
  • Interprocedural analysis of calls to "blocks".
  • -
  • Precise modeling of GCD APIs such as dispatch_once and friends.
  • -
  • Improved support for recently added Objective-C constructs such as array and dictionary literals.
  • -
- -

The release specifically includes notable improvements for C++ analysis, including:

- -
    -
  • Interprocedural analysis for C++ methods (within a translation unit).
  • -
  • More precise modeling of C++ initializers and destructors.
  • -
- -

Finally, this release includes many small improvements to scan-build, which can be used to drive the analyzer from the command line or a continuous integration system. This includes a directory-traversal issue, which could cause potential security problems in some cases. We would like to acknowledge Tim Brown of Portcullis Computer Security Ltd for reporting this issue.

- -
- -
- - -

- External Open Source Projects Using LLVM 3.2 -

- - -
- -

An exciting aspect of LLVM is that it is used as an enabling technology for - a lot of other language and tools projects. This section lists some of the - projects that have already been updated to work with LLVM 3.2.

- -

Crack

- -
- -

Crack aims to provide - the ease of development of a scripting language with the performance of a - compiled language. The language derives concepts from C++, Java and Python, - incorporating object-oriented programming, operator overloading and strong - typing.

- -
- -

EmbToolkit

- -
- -

EmbToolkit provides Linux cross-compiler - toolchain/SDK (GCC/binutils/C library (uclibc,eglibc,musl)), a build system for - package cross-compilation and optionally various root file systems. - It supports ARM and MIPS. There is an ongoing effort to provide a clang+llvm - environment for the 3.2 releases, -

- -
- -

FAUST

- -
- -

FAUST is a compiled language for - real-time audio signal processing. The name FAUST stands for Functional - AUdio STream. Its programming model combines two approaches: functional - programming and block diagram composition. In addition with the C, C++, Java, - JavaScript output formats, the Faust compiler can generate LLVM bitcode, and - works with LLVM 2.7-3.2.

- -
- -

Glasgow Haskell Compiler (GHC)

- -
- -

GHC is an open source compiler and - programming suite for Haskell, a lazy functional programming language. It - includes an optimizing static compiler generating good code for a variety of - platforms, together with an interactive system for convenient, quick - development.

- -

GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and - later.

- -
- -

Julia

- -
- -

Julia is a high-level, - high-performance dynamic language for technical computing. It provides a - sophisticated compiler, distributed parallel execution, numerical accuracy, - and an extensive mathematical function library. The compiler uses type - inference to generate fast code without any type declarations, and uses - LLVM's optimization passes and JIT compiler. The - Julia Language is designed - around multiple dispatch, giving programs a large degree of flexibility. It - is ready for use on many kinds of problems.

- -
- -

LLVM D Compiler

- -
- -

LLVM D Compiler (LDC) is - a compiler for the D programming Language. It is based on the DMD frontend - and uses LLVM as backend.

- -
- -

Open Shading Language

- -
- -

Open Shading - Language (OSL) is a small but rich language for programmable shading in - advanced global illumination renderers and other applications, ideal for - describing materials, lights, displacement, and pattern generation. It uses - LLVM to JIT complex shader networks to x86 code at runtime.

- -

OSL was developed by Sony Pictures Imageworks for use in its in-house - renderer used for feature film animation and visual effects, and is - distributed as open source software with the "New BSD" license. - It has been used for all the shading on such films as The Amazing Spider-Man, - Men in Black III, Hotel Transylvania, and may other films in-progress, - and also has been incorporated into several commercial and open source - rendering products such as Blender, VRay, and Autodesk Beast.

- -
- -

Portable OpenCL (pocl)

- -
- -

In addition to producing an easily portable open source OpenCL - implementation, another major goal of - pocl is improving performance portability of OpenCL programs with - compiler optimizations, reducing the need for target-dependent manual - optimizations. An important part of pocl is a set of LLVM passes used to - statically parallelize multiple work-items with the kernel compiler, even in - the presence of work-group barriers. This enables static parallelization of - the fine-grained static concurrency in the work groups in multiple ways - (SIMD, VLIW, superscalar,...).

- -
- -

Pure

- -
- -

Pure is an - algebraic/functional programming language based on term rewriting. Programs - are collections of equations which are used to evaluate expressions in a - symbolic fashion. The interpreter uses LLVM as a backend to JIT-compile Pure - programs to fast native code. Pure offers dynamic typing, eager and lazy - evaluation, lexical closures, a hygienic macro system (also based on term - rewriting), built-in list and matrix support (including list and matrix - comprehensions) and an easy-to-use interface to C and other programming - languages (including the ability to load LLVM bitcode modules, and inline C, - C++, Fortran and Faust code in Pure programs if the corresponding - LLVM-enabled compilers are installed).

- -

Pure version 0.56 has been tested and is known to work with LLVM 3.2 (and - continues to work with older LLVM releases >= 2.5).

- -
- -

TTA-based Co-design Environment (TCE)

- -
- -

TCE is a toolset for designing - application-specific processors (ASP) based on the Transport triggered - architecture (TTA). The toolset provides a complete co-design flow from C/C++ - programs down to synthesizable VHDL/Verilog and parallel program binaries. - Processor customization points include the register files, function units, - supported operations, and the interconnection network.

- -

TCE uses Clang and LLVM for C/C++ language support, target independent - optimizations and also for parts of code generation. It generates new - LLVM-based code generators "on the fly" for the designed TTA processors and - loads them in to the compiler backend as runtime libraries to avoid - per-target recompilation of larger parts of the compiler chain.

- -
- -
- - -

- What's New in LLVM 3.2? -

- - -
- -

This release includes a huge number of bug fixes, performance tweaks and - minor improvements. Some of the major improvements and new features are - listed in this section.

- - -

-Major New Features -

- -
- - - - - -

LLVM 3.2 includes several major changes and big features:

- -
    -
  • Loop Vectorizer.
  • -
  • New implementation of SROA.
  • -
  • New NVPTX back-end (replacing existing PTX back-end) based on NVIDIA sources.
  • -
- -
- - - -

-LLVM IR and Core Improvements -

- -
- -

LLVM IR has several new features for better support of new targets and that - expose new optimization opportunities:

- -
    -
  • Thread local variables may have a specified TLS model. See the - Language Reference Manual.
  • -
  • 'TYPE_CODE_FUNCTION_OLD' type code and autoupgrade code for old function attributes format has been removed.
  • -
  • Internal representation of the Attributes class has been converted into a pointer to an - opaque object that's uniqued by and stored in the LLVMContext object. - The Attributes class then becomes a thin wrapper around this opaque object.
  • -
- -
- - -

-Optimizer Improvements -

- -
- -

In addition to many minor performance tweaks and bug fixes, this release - includes a few major enhancements and additions to the optimizers:

- -

Loop Vectorizer - We've added a loop vectorizer and we are now able to - vectorize small loops. The loop vectorizer is disabled by default and - can be enabled using the -mllvm -vectorize-loops flag. - The SIMD vector width can be specified using the flag - -mllvm -force-vector-width=4. - The default value is 0 which means auto-select. -
- We can now vectorize this function: - -

-    unsigned sum_arrays(int *A, int *B, int start, int end) {
-      unsigned sum = 0;
-      for (int i = start; i < end; ++i)
-        sum += A[i] + B[i] + i;
-
-      return sum;
-    }
-    
- - We vectorize under the following loops: -
    -
  • The inner most loops must have a single basic block.
  • -
  • The number of iterations are known before the loop starts to execute.
  • -
  • The loop counter needs to be incremented by one.
  • -
  • The loop trip count can be a variable.
  • -
  • Loops do not need to start at zero.
  • -
  • The induction variable can be used inside the loop.
  • -
  • Loop reductions are supported.
  • -
  • Arrays with affine access pattern do not need to be marked as 'noalias' and are checked at runtime.
  • -
- -

- -

SROA - We’ve re-written SROA to be significantly more powerful and generate -code which is much more friendly to the rest of the optimization pipeline. -Previously this pass had scaling problems that required it to only operate on -relatively small aggregates, and at times it would mistakenly replace a large -aggregate with a single very large integer in order to make it a scalar SSA -value. The result was a large number of i1024 and i2048 values representing any -small stack buffer. These in turn slowed down many subsequent optimization -paths.

-

The new SROA pass uses a different algorithm that allows it to only promote to -scalars the pieces of the aggregate actively in use. Because of this it doesn’t -require any thresholds. It also always deduces the scalar values from the uses -of the aggregate rather than the specific LLVM type of the aggregate. These -features combine to both optimize more code with the pass but to improve the -compile time of many functions dramatically.

- -
    -
  • Branch weight metadata is preserved through more of the optimizer.
  • -
- -
- - -

-MC Level Improvements -

- -
- -

The LLVM Machine Code (aka MC) subsystem was created to solve a number of - problems in the realm of assembly, disassembly, object file format handling, - and a number of other related areas that CPU instruction-set level tools work - in. For more information, please see the - Intro - to the LLVM MC Project Blog Post.

- -
    -
  • Added support for following assembler directives: .ifb, .ifnb, .ifc, - .ifnc, .purgem, .rept and .version (ELF) as well as Darwin specific - .pushsection, .popsection and .previous .
  • -
  • Enhanced handling of .lcomm directive.
  • -
  • MS style inline assembler: added implementation of the offset and TYPE operators.
  • -
  • Targets can specify minimum supported NOP size for NOP padding.
  • -
  • ELF improvements: added support for generating ELF objects on Windows.
  • -
  • MachO improvements: symbol-difference variables are marked as N_ABS, added direct-to-object attribute for data-in-code markers.
  • -
  • Added support for annotated disassembly output for x86 and arm targets.
  • -
  • Arm support has been improved by adding support for ARM TARGET2 relocation - and fixing hadling of ARM-style "$d.*" labels.
  • -
  • Implemented local-exec TLS on PowerPC.
  • -
- -
- - -

-Target Independent Code Generator Improvements -

- -
- -

Stack Coloring - We have implemented a new optimization pass - to merge stack objects which are used in disjoin areas of the code. - This optimization reduces the required stack space significantly, in cases - where it is clear to the optimizer that the stack slot is not shared. - We use the lifetime markers to tell the codegen that a certain alloca - is used within a region.

- -

We now merge consecutive loads and stores.

- -

We have put a significant amount of work into the code generator - infrastructure, which allows us to implement more aggressive algorithms and - make it run faster:

- -

We added new TableGen infrastructure to support bundling for - Very Long Instruction Word (VLIW) architectures. TableGen can now - automatically generate a deterministic finite automaton from a VLIW - target's schedule description which can be queried to determine - legal groupings of instructions in a bundle.

- -

We have added a new target independent VLIW packetizer based on the - DFA infrastructure to group machine instructions into bundles.

- -

We have added new TableGen infrastructure to support relationship maps - between instructions. This feature enables TableGen to automatically - construct a set of relation tables and query functions that can be used - to switch between various forms of instructions. For more information, - please refer to - How To Use Instruction Mappings.

- -
- -

-Basic Block Placement -

- -
- -

A probability based block placement and code layout algorithm was added to - LLVM's code generator. This layout pass supports probabilities derived from - static heuristics as well as source code annotations such as - __builtin_expect.

- -
- - -

-X86-32 and X86-64 Target Improvements -

- -
- -

New features and major changes in the X86 target include:

- -
    -
  • Small codegen optimizations, especially for AVX2.
  • -
- -
- - -

-ARM Target Improvements -

- -
- -

New features of the ARM target include:

- -
    -
  • Support and performance tuning for the A6 'Swift' CPU.
  • -
- - - -

-ARM Integrated Assembler -

- -
- -

The ARM target now includes a full featured macro assembler, including - direct-to-object module support for clang. The assembler is currently enabled - by default for Darwin only pending testing and any additional necessary - platform specific support for Linux.

- -

Full support is included for Thumb1, Thumb2 and ARM modes, along with - sub-target and CPU specific extensions for VFP2, VFP3 and NEON.

- -

The assembler is Unified Syntax only (see ARM Architecural Reference Manual - for details). While there is some, and growing, support for pre-unfied - (divided) syntax, there are still significant gaps in that support.

- -
- -
- - -

-MIPS Target Improvements -

- -
- -

New features and major changes in the MIPS target include:

- -
    -
  • Integrated assembler support: - MIPS32 works for both PIC and static, known limitation is the PR14456 where - R_MIPS_GPREL16 relocation is generated with the wrong addend. - MIPS64 support is incomplete, for example exception handling is not working.
  • -
  • Support for fast calling convention has been added.
  • -
  • Support for Android MIPS toolchain has been added to clang driver.
  • -
  • Added clang driver support for MIPS N32 ABI through "-mabi=n32" option.
  • -
  • MIPS32 and MIPS64 disassembler has been implemented.
  • -
  • Support for compiling programs with large GOTs (exceeding 64kB in size) has been added - through llc option "-mxgot".
  • -
  • Added experimental support for MIPS32 DSP intrinsics.
  • -
  • Experimental support for MIPS16 with following limitations: only soft float is supported, - C++ exceptions are not supported, large stack frames (> 32000 bytes) are not supported, - direct object code emission is not supported only .s .
  • -
  • Standalone assembler (llvm-mc): implementation is in progress and considered experimental.
  • -
  • All classic JIT and MCJIT tests pass on Little and Big Endian MIPS32 platforms.
  • -
  • Inline asm support: all common constraints and operand modifiers have been implemented.
  • -
  • Added tail call optimization support, use llc option "-enable-mips-tail-calls" - or clang options "-mllvm -enable-mips-tail-calls"to enable it.
  • -
  • Improved register allocation by removing registers $fp, $gp, $ra and $at from the list of reserved registers.
  • -
  • Long branch expansion pass has been implemented, which expands branch - instructions with offsets that do not fit in the 16-bit field.
  • -
  • Cavium Octeon II board is used for testing builds (llvm-mips-linux builder).
  • -
- -
- - -

-PowerPC Target Improvements -

- -
- -

Many fixes and changes across LLVM (and Clang) for better compliance with - the 64-bit PowerPC ELF Application Binary Interface, interoperability with - GCC, and overall 64-bit PowerPC support. Some highlights include:

-
    -
  • MCJIT support added.
  • -
  • PPC64 relocation support and (small code model) TOC handling - added.
  • -
  • Parameter passing and return value fixes (alignment issues, - padding, varargs support, proper register usage, odd-sized - structure support, float support, extension of return values - for i32 return values).
  • -
  • Fixes in spill and reload code for vector registers.
  • -
  • C++ exception handling enabled.
  • -
  • Changes to remediate double-rounding compatibility issues with - respect to GCC behavior.
  • -
  • Refactoring to disentangle ppc64-elf-linux ABI from Darwin - ppc64 ABI support.
  • -
  • Assorted new test cases and test case fixes (endian and word - size issues).
  • -
  • Fixes for big-endian codegen bugs, instruction encodings, and - instruction constraints.
  • -
  • Implemented -integrated-as support.
  • -
  • Additional support for Altivec compare operations.
  • -
  • IBM long double support.
  • -
-

There have also been code generation improvements for both 32- and 64-bit - code. Instruction scheduling support for the Freescale e500mc and e5500 - cores has been added.

- -
- - -

-PTX/NVPTX Target Improvements -

- -
- -

The PTX back-end has been replaced by the NVPTX back-end, which is based on - the LLVM back-end used by NVIDIA in their CUDA (nvcc) and OpenCL compiler. - Some highlights include:

-
    -
  • Compatibility with PTX 3.1 and SM 3.5
  • -
  • Support for NVVM intrinsics as defined in the NVIDIA Compiler SDK
  • -
  • Full compatibility with old PTX back-end, with much greater coverage of - LLVM IR
  • -
- -

Please submit any back-end bugs to the LLVM Bugzilla site.

- -
- - -

-Other Target Specific Improvements -

- -
- -
    -
  • Added support for custom names for library functions in TargetLibraryInfo.
  • -
- -
- - -

-Major Changes and Removed Features -

- -
- -

If you're already an LLVM user or developer with out-of-tree changes based on - LLVM 3.2, this section lists some "gotchas" that you may run into upgrading - from the previous release.

- -
    -
  • llvm-ld and llvm-stub have been removed, llvm-ld functionality can be partially replaced by - llvm-link | opt | {llc | as, llc -filetype=obj} | ld, or fully replaced by Clang.
  • -
  • MCJIT: added support for inline assembly (requires asm parser), added faux remote target execution to lli option '-remote-mcjit'.
  • -
- -
- - -

-Internal API Changes -

- -
- -

In addition, many APIs have changed in this release. Some of the major - LLVM API changes are:

- -

We've added a new interface for allowing IR-level passes to access - target-specific information. A new IR-level pass, called - "TargetTransformInfo" provides a number of low-level interfaces. - LSR and LowerInvoke already use the new interface.

- -

The TargetData structure has been renamed to DataLayout and moved to VMCore -to remove a dependency on Target.

- -
- - -

-Tools Changes -

- -
- -

In addition, some tools have changed in this release. Some of the changes are:

- -
    -
  • opt: added support for '-mtriple' option.
  • -
  • llvm-mc : - added '-disassemble' support for '-show-inst' and '-show-encoding' options, added '-edis' option to produce annotated - disassembly output for X86 and ARM targets.
  • -
  • libprofile: allows the profile data file name to be specified by the LLVMPROF_OUTPUT environment variable.
  • -
  • llvm-objdump: has been changed to display available targets, '-arch' option accepts x86 and x86-64 as valid arch names.
  • -
  • llc and opt: added FMA formation from pairs of FADD + FMUL or FSUB + FMUL enabled by option '-enable-excess-fp-precision' or option '-enable-unsafe-fp-math', - option '-fp-contract' controls the creation by optimizations of fused FP by selecting Fast, Standard, or Strict mode.
  • -
  • llc: object file output from llc is no longer considered experimental.
  • -
  • gold plugin: handles Position Independent Executables.
  • -
- -
- - - -

- Known Problems -

- - -
- -

LLVM is generally a production quality compiler, and is used by a broad range - of applications and shipping in many products. That said, not every - subsystem is as mature as the aggregate, particularly the more obscure - targets. If you run into a problem, please check - the LLVM bug database and submit a bug if - there isn't already one or ask on - the LLVMdev - list.

- -

Known problem areas include:

- -
    -
  • The CellSPU, MSP430, and XCore backends are experimental, and the CellSPU backend will be removed in LLVM 3.3.
  • - -
  • The integrated assembler, disassembler, and JIT is not supported by - several targets. If an integrated assembler is not supported, then a - system assembler is required. For more details, see the Target Features Matrix. -
  • -
- -
- - -

- Additional Information -

- - -
- -

A wide variety of additional information is available on - the LLVM web page, in particular in - the documentation section. The web page - also contains versions of the API documentation which is up-to-date with the - Subversion version of the source code. You can access versions of these - documents specific to this release by going into the "llvm/doc/" - directory in the LLVM tree.

- -

If you have any questions or comments about LLVM, please feel free to contact - us via the mailing lists.

- -
- - - -
-
- Valid CSS - Valid HTML 4.01 - - LLVM Compiler Infrastructure
- Last modified: $Date: 2012-12-19 11:50:28 +0100 (Wed, 19 Dec 2012) $ -
- - - diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst new file mode 100644 index 000000000000..3ca556025494 --- /dev/null +++ b/docs/ReleaseNotes.rst @@ -0,0 +1,144 @@ +====================== +LLVM 3.3 Release Notes +====================== + +.. contents:: + :local: + +.. warning:: + These are in-progress notes for the upcoming LLVM 3.3 release. You may + prefer the `LLVM 3.2 Release Notes `_. + + +Introduction +============ + +This document contains the release notes for the LLVM Compiler Infrastructure, +release 3.3. Here we describe the status of LLVM, including major improvements +from the previous release, improvements in various subprojects of LLVM, and +some of the current users of the code. All LLVM releases may be downloaded +from the `LLVM releases web site `_. + +For more information about LLVM, including information about the latest +release, please check out the `main LLVM web site `_. If you +have questions or comments, the `LLVM Developer's Mailing List +`_ is a good place to send +them. + +Note that if you are reading this file from a Subversion checkout or the main +LLVM web page, this document applies to the *next* release, not the current +one. To see the release notes for a specific release, please see the `releases +page `_. + +Non-comprehensive list of changes in this release +================================================= + +.. NOTE + For small 1-3 sentence descriptions, just add an entry at the end of + this list. If your description won't fit comfortably in one bullet + point (e.g. maybe you would like to give an example of the + functionality, or simply have a lot to talk about), see the `NOTE` below + for adding a new subsection. + +* The CellSPU port has been removed. It can still be found in older versions. + +* The IR-level extended linker APIs (for example, to link bitcode files out of + archives) have been removed. Any existing clients of these features should + move to using a linker with integrated LTO support. + +* LLVM and Clang's documentation has been migrated to the `Sphinx + `_ documentation generation system which uses + easy-to-write reStructuredText. See `llvm/docs/README.txt` for more + information. + +* TargetTransformInfo (TTI) is a new interface that can be used by IR-level + passes to obtain target-specific information, such as the costs of + instructions. Only "Lowering" passes such as LSR and the vectorizer are + allowed to use the TTI infrastructure. + +* We've improved the X86 and ARM cost model. + +* The Attributes classes have been completely rewritten and expanded. They now + support not only enumerated attributes and alignments, but "string" + attributes, which are useful for passing information to code generation. See + :doc:`HowToUseAttributes` for more details. + +* TableGen's syntax for instruction selection patterns has been simplified. + Instead of specifying types indirectly with register classes, you should now + specify types directly in the input patterns. See ``SparcInstrInfo.td`` for + examples of the new syntax. The old syntax using register classes still + works, but it will be removed in a future LLVM release. + +* ... next change ... + +.. NOTE + If you would like to document a larger change, then you can add a + subsection about it right here. You can copy the following boilerplate + and un-indent it (the indentation causes it to be inside this comment). + + Special New Feature + ------------------- + + Makes programs 10x faster by doing Special New Thing. + +AArch64 target +-------------- + +We've added support for AArch64, ARM's 64-bit architecture. Development is still +in fairly early stages, but we expect successful compilation when: + +- compiling standard compliant C99 and C++03 with Clang; +- using Linux as a target platform; +- where code + static data doesn't exceed 4GB in size (heap allocated data has + no limitation). + +Some additional functionality is also implemented, notably DWARF debugging, +GNU-style thread local storage and inline assembly. + +Hexagon Target +-------------- + +- Removed support for legacy hexagonv2 and hexagonv3 processor + architectures which are no longer in use. Currently supported + architectures are hexagonv4 and hexagonv5. + +Loop Vectorizer +--------------- + +We've continued the work on the loop vectorizer. The loop vectorizer now +has the following features: + +- Loops with unknown trip count. +- Runtime checks of pointers +- Reductions, Inductions +- If Conversion +- Pointer induction variables +- Reverse iterators +- Vectorization of mixed types +- Vectorization of function calls +- Partial unrolling during vectorization + +R600 Backend +------------ + +The R600 backend was added in this release, it supports AMD GPUs +(HD2XXX - HD7XXX). This backend is used in AMD's Open Source +graphics / compute drivers which are developed as part of the `Mesa3D +`_ project. + + + +Additional Information +====================== + +A wide variety of additional information is available on the `LLVM web page +`_, in particular in the `documentation +`_ section. The web page also contains versions of the +API documentation which is up-to-date with the Subversion version of the source +code. You can access versions of these documents specific to this release by +going into the ``llvm/docs/`` directory in the LLVM tree. + +If you have any questions or comments about LLVM, please feel free to contact +us via the `mailing lists `_. + diff --git a/docs/SegmentedStacks.rst b/docs/SegmentedStacks.rst index f97d62abda04..e44ce42313cb 100644 --- a/docs/SegmentedStacks.rst +++ b/docs/SegmentedStacks.rst @@ -1,5 +1,3 @@ -.. _segmented_stacks: - ======================== Segmented Stacks in LLVM ======================== diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html deleted file mode 100644 index 1dcee54f0bf9..000000000000 --- a/docs/SourceLevelDebugging.html +++ /dev/null @@ -1,2858 +0,0 @@ - - - - - Source Level Debugging with LLVM - - - - -

Source Level Debugging with LLVM

- - - - -
- -
- -
-

Written by Chris Lattner - and Jim Laskey

-
- - - -

Introduction

- - -
- -

This document is the central repository for all information pertaining to - debug information in LLVM. It describes the actual format - that the LLVM debug information takes, which is useful for those - interested in creating front-ends or dealing directly with the information. - Further, this document provides specific examples of what debug information - for C/C++ looks like.

- - -

- Philosophy behind LLVM debugging information -

- -
- -

The idea of the LLVM debugging information is to capture how the important - pieces of the source-language's Abstract Syntax Tree map onto LLVM code. - Several design aspects have shaped the solution that appears here. The - important ones are:

- -
    -
  • Debugging information should have very little impact on the rest of the - compiler. No transformations, analyses, or code generators should need to - be modified because of debugging information.
  • - -
  • LLVM optimizations should interact in well-defined and - easily described ways with the debugging information.
  • - -
  • Because LLVM is designed to support arbitrary programming languages, - LLVM-to-LLVM tools should not need to know anything about the semantics of - the source-level-language.
  • - -
  • Source-level languages are often widely different from one another. - LLVM should not put any restrictions of the flavor of the source-language, - and the debugging information should work with any language.
  • - -
  • With code generator support, it should be possible to use an LLVM compiler - to compile a program to native machine code and standard debugging - formats. This allows compatibility with traditional machine-code level - debuggers, like GDB or DBX.
  • -
- -

The approach used by the LLVM implementation is to use a small set - of intrinsic functions to define a - mapping between LLVM program objects and the source-level objects. The - description of the source-level program is maintained in LLVM metadata - in an implementation-defined format - (the C/C++ front-end currently uses working draft 7 of - the DWARF 3 - standard).

- -

When a program is being debugged, a debugger interacts with the user and - turns the stored debug information into source-language specific information. - As such, a debugger must be aware of the source-language, and is thus tied to - a specific language or family of languages.

- -
- - -

- Debug information consumers -

- -
- -

The role of debug information is to provide meta information normally - stripped away during the compilation process. This meta information provides - an LLVM user a relationship between generated code and the original program - source code.

- -

Currently, debug information is consumed by DwarfDebug to produce dwarf - information used by the gdb debugger. Other targets could use the same - information to produce stabs or other debug forms.

- -

It would also be reasonable to use debug information to feed profiling tools - for analysis of generated code, or, tools for reconstructing the original - source from generated code.

- -

TODO - expound a bit more.

- -
- - -

- Debugging optimized code -

- -
- -

An extremely high priority of LLVM debugging information is to make it - interact well with optimizations and analysis. In particular, the LLVM debug - information provides the following guarantees:

- -
    -
  • LLVM debug information always provides information to accurately read - the source-level state of the program, regardless of which LLVM - optimizations have been run, and without any modification to the - optimizations themselves. However, some optimizations may impact the - ability to modify the current state of the program with a debugger, such - as setting program variables, or calling functions that have been - deleted.
  • - -
  • As desired, LLVM optimizations can be upgraded to be aware of the LLVM - debugging information, allowing them to update the debugging information - as they perform aggressive optimizations. This means that, with effort, - the LLVM optimizers could optimize debug code just as well as non-debug - code.
  • - -
  • LLVM debug information does not prevent optimizations from - happening (for example inlining, basic block reordering/merging/cleanup, - tail duplication, etc).
  • - -
  • LLVM debug information is automatically optimized along with the rest of - the program, using existing facilities. For example, duplicate - information is automatically merged by the linker, and unused information - is automatically removed.
  • -
- -

Basically, the debug information allows you to compile a program with - "-O0 -g" and get full debug information, allowing you to arbitrarily - modify the program as it executes from a debugger. Compiling a program with - "-O3 -g" gives you full debug information that is always available - and accurate for reading (e.g., you get accurate stack traces despite tail - call elimination and inlining), but you might lose the ability to modify the - program and call functions where were optimized out of the program, or - inlined away completely.

- -

LLVM test suite provides a - framework to test optimizer's handling of debugging information. It can be - run like this:

- -
-
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=dbgopt
-
-
- -

This will test impact of debugging information on optimization passes. If - debugging information influences optimization passes then it will be reported - as a failure. See TestingGuide for more - information on LLVM test infrastructure and how to run various tests.

- -
- -
- - -

- Debugging information format -

- - -
- -

LLVM debugging information has been carefully designed to make it possible - for the optimizer to optimize the program and debugging information without - necessarily having to know anything about debugging information. In - particular, the use of metadata avoids duplicated debugging information from - the beginning, and the global dead code elimination pass automatically - deletes debugging information for a function if it decides to delete the - function.

- -

To do this, most of the debugging information (descriptors for types, - variables, functions, source files, etc) is inserted by the language - front-end in the form of LLVM metadata.

- -

Debug information is designed to be agnostic about the target debugger and - debugging information representation (e.g. DWARF/Stabs/etc). It uses a - generic pass to decode the information that represents variables, types, - functions, namespaces, etc: this allows for arbitrary source-language - semantics and type-systems to be used, as long as there is a module - written for the target debugger to interpret the information.

- -

To provide basic functionality, the LLVM debugger does have to make some - assumptions about the source-level language being debugged, though it keeps - these to a minimum. The only common features that the LLVM debugger assumes - exist are source files, - and program objects. These abstract - objects are used by a debugger to form stack traces, show information about - local variables, etc.

- -

This section of the documentation first describes the representation aspects - common to any source-language. The next section - describes the data layout conventions used by the C and C++ front-ends.

- - -

- Debug information descriptors -

- -
- -

In consideration of the complexity and volume of debug information, LLVM - provides a specification for well formed debug descriptors.

- -

Consumers of LLVM debug information expect the descriptors for program - objects to start in a canonical format, but the descriptors can include - additional information appended at the end that is source-language - specific. All LLVM debugging information is versioned, allowing backwards - compatibility in the case that the core structures need to change in some - way. Also, all debugging information objects start with a tag to indicate - what type of object it is. The source-language is allowed to define its own - objects, by using unreserved tag numbers. We recommend using with tags in - the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base = - 0x1000.)

- -

The fields of debug descriptors used internally by LLVM - are restricted to only the simple data types i32, i1, - float, double, mdstring and mdnode.

- -
-
-!1 = metadata !{
-  i32,   ;; A tag
-  ...
-}
-
-
- -

The first field of a descriptor is always an - i32 containing a tag value identifying the content of the - descriptor. The remaining fields are specific to the descriptor. The values - of tags are loosely bound to the tag values of DWARF information entries. - However, that does not restrict the use of the information supplied to DWARF - targets. To facilitate versioning of debug information, the tag is augmented - with the current debug version (LLVMDebugVersion = 8 << 16 or - 0x80000 or 524288.)

- -

The details of the various descriptors follow.

- - -

- Compile unit descriptors -

- -
- -
-
-!0 = metadata !{
-  i32,       ;; Tag = 17 + LLVMDebugVersion
-             ;; (DW_TAG_compile_unit)
-  i32,       ;; Unused field.
-  i32,       ;; DWARF language identifier (ex. DW_LANG_C89)
-  metadata,  ;; Source file name
-  metadata,  ;; Source file directory (includes trailing slash)
-  metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
-  i1,        ;; True if this is a main compile unit.
-  i1,        ;; True if this is optimized.
-  metadata,  ;; Flags
-  i32        ;; Runtime version
-  metadata   ;; List of enums types
-  metadata   ;; List of retained types
-  metadata   ;; List of subprograms
-  metadata   ;; List of global variables
-}
-
-
- -

These descriptors contain a source language ID for the file (we use the DWARF - 3.0 ID numbers, such as DW_LANG_C89, DW_LANG_C_plus_plus, - DW_LANG_Cobol74, etc), three strings describing the filename, - working directory of the compiler, and an identifier string for the compiler - that produced it.

- -

Compile unit descriptors provide the root context for objects declared in a - specific compilation unit. File descriptors are defined using this context. - These descriptors are collected by a named metadata - !llvm.dbg.cu. Compile unit descriptor keeps track of subprograms, - global variables and type information. - -

- - -

- File descriptors -

- -
- -
-
-!0 = metadata !{
-  i32,       ;; Tag = 41 + LLVMDebugVersion
-             ;; (DW_TAG_file_type)
-  metadata,  ;; Source file name
-  metadata,  ;; Source file directory (includes trailing slash)
-  metadata   ;; Unused
-}
-
-
- -

These descriptors contain information for a file. Global variables and top - level functions would be defined using this context.k File descriptors also - provide context for source line correspondence.

- -

Each input file is encoded as a separate file descriptor in LLVM debugging - information output.

- -
- - -

- Global variable descriptors -

- -
- -
-
-!1 = metadata !{
-  i32,      ;; Tag = 52 + LLVMDebugVersion
-            ;; (DW_TAG_variable)
-  i32,      ;; Unused field.
-  metadata, ;; Reference to context descriptor
-  metadata, ;; Name
-  metadata, ;; Display name (fully qualified C++ name)
-  metadata, ;; MIPS linkage name (for C++)
-  metadata, ;; Reference to file where defined
-  i32,      ;; Line number where defined
-  metadata, ;; Reference to type descriptor
-  i1,       ;; True if the global is local to compile unit (static)
-  i1,       ;; True if the global is defined in the compile unit (not extern)
-  {}*       ;; Reference to the global variable
-}
-
-
- -

These descriptors provide debug information about globals variables. The -provide details such as name, type and where the variable is defined. All -global variables are collected inside the named metadata -!llvm.dbg.cu.

- -
- - -

- Subprogram descriptors -

- -
- -
-
-!2 = metadata !{
-  i32,      ;; Tag = 46 + LLVMDebugVersion
-            ;; (DW_TAG_subprogram)
-  i32,      ;; Unused field.
-  metadata, ;; Reference to context descriptor
-  metadata, ;; Name
-  metadata, ;; Display name (fully qualified C++ name)
-  metadata, ;; MIPS linkage name (for C++)
-  metadata, ;; Reference to file where defined
-  i32,      ;; Line number where defined
-  metadata, ;; Reference to type descriptor
-  i1,       ;; True if the global is local to compile unit (static)
-  i1,       ;; True if the global is defined in the compile unit (not extern)
-  i32,      ;; Line number where the scope of the subprogram begins
-  i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
-  i32,      ;; Index into a virtual function
-  metadata, ;; indicates which base type contains the vtable pointer for the
-            ;; derived class
-  i32,      ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
-  i1,       ;; isOptimized
-  Function *,;; Pointer to LLVM function
-  metadata, ;; Lists function template parameters
-  metadata  ;; Function declaration descriptor
-  metadata  ;; List of function variables
-}
-
-
- -

These descriptors provide debug information about functions, methods and - subprograms. They provide details such as name, return types and the source - location where the subprogram is defined. -

- -
- - -

- Block descriptors -

- -
- -
-
-!3 = metadata !{
-  i32,     ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
-  metadata,;; Reference to context descriptor
-  i32,     ;; Line number
-  i32,     ;; Column number
-  metadata,;; Reference to source file
-  i32      ;; Unique ID to identify blocks from a template function
-}
-
-
- -

This descriptor provides debug information about nested blocks within a - subprogram. The line number and column numbers are used to dinstinguish - two lexical blocks at same depth.

- -
-
-!3 = metadata !{
-  i32,     ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
-  metadata ;; Reference to the scope we're annotating with a file change
-  metadata,;; Reference to the file the scope is enclosed in.
-}
-
-
- -

This descriptor provides a wrapper around a lexical scope to handle file - changes in the middle of a lexical block.

- -
- - -

- Basic type descriptors -

- -
- -
-
-!4 = metadata !{
-  i32,      ;; Tag = 36 + LLVMDebugVersion
-            ;; (DW_TAG_base_type)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags
-  i32       ;; DWARF type encoding
-}
-
-
- -

These descriptors define primitive types used in the code. Example int, bool - and float. The context provides the scope of the type, which is usually the - top level. Since basic types are not usually user defined the context - and line number can be left as NULL and 0. The size, alignment and offset - are expressed in bits and can be 64 bit values. The alignment is used to - round the offset when embedded in a - composite type (example to keep float - doubles on 64 bit boundaries.) The offset is the bit offset if embedded in - a composite type.

- -

The type encoding provides the details of the type. The values are typically - one of the following:

- -
-
-DW_ATE_address       = 1
-DW_ATE_boolean       = 2
-DW_ATE_float         = 4
-DW_ATE_signed        = 5
-DW_ATE_signed_char   = 6
-DW_ATE_unsigned      = 7
-DW_ATE_unsigned_char = 8
-
-
- -
- - -

- Derived type descriptors -

- -
- -
-
-!5 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags to encode attributes, e.g. private
-  metadata, ;; Reference to type derived from
-  metadata, ;; (optional) Name of the Objective C property associated with
-            ;; Objective-C an ivar
-  metadata, ;; (optional) Name of the Objective C property getter selector.
-  metadata, ;; (optional) Name of the Objective C property setter selector.
-  i32       ;; (optional) Objective C property attributes.
-}
-
-
- -

These descriptors are used to define types derived from other types. The -value of the tag varies depending on the meaning. The following are possible -tag values:

- -
-
-DW_TAG_formal_parameter = 5
-DW_TAG_member           = 13
-DW_TAG_pointer_type     = 15
-DW_TAG_reference_type   = 16
-DW_TAG_typedef          = 22
-DW_TAG_const_type       = 38
-DW_TAG_volatile_type    = 53
-DW_TAG_restrict_type    = 55
-
-
- -

DW_TAG_member is used to define a member of - a composite type - or subprogram. The type of the member is - the derived - type. DW_TAG_formal_parameter is used to define a member which - is a formal argument of a subprogram.

- -

DW_TAG_typedef is used to provide a name for the derived type.

- -

DW_TAG_pointer_type, DW_TAG_reference_type, - DW_TAG_const_type, DW_TAG_volatile_type and - DW_TAG_restrict_type are used to qualify - the derived type.

- -

Derived type location can be determined - from the context and line number. The size, alignment and offset are - expressed in bits and can be 64 bit values. The alignment is used to round - the offset when embedded in a composite - type (example to keep float doubles on 64 bit boundaries.) The offset is - the bit offset if embedded in a composite - type.

- -

Note that the void * type is expressed as a type derived from NULL. -

- -
- - -

- Composite type descriptors -

- -
- -
-
-!6 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags
-  metadata, ;; Reference to type derived from
-  metadata, ;; Reference to array of member descriptors
-  i32       ;; Runtime languages
-}
-
-
- -

These descriptors are used to define types that are composed of 0 or more -elements. The value of the tag varies depending on the meaning. The following -are possible tag values:

- -
-
-DW_TAG_array_type       = 1
-DW_TAG_enumeration_type = 4
-DW_TAG_structure_type   = 19
-DW_TAG_union_type       = 23
-DW_TAG_vector_type      = 259
-DW_TAG_subroutine_type  = 21
-DW_TAG_inheritance      = 28
-
-
- -

The vector flag indicates that an array type is a native packed vector.

- -

The members of array types (tag = DW_TAG_array_type) or vector types - (tag = DW_TAG_vector_type) are subrange - descriptors, each representing the range of subscripts at that level of - indexing.

- -

The members of enumeration types (tag = DW_TAG_enumeration_type) are - enumerator descriptors, each representing - the definition of enumeration value for the set. All enumeration type - descriptors are collected inside the named metadata - !llvm.dbg.cu.

- -

The members of structure (tag = DW_TAG_structure_type) or union (tag - = DW_TAG_union_type) types are any one of - the basic, - derived - or composite type descriptors, each - representing a field member of the structure or union.

- -

For C++ classes (tag = DW_TAG_structure_type), member descriptors - provide information about base classes, static members and member - functions. If a member is a derived type - descriptor and has a tag of DW_TAG_inheritance, then the type - represents a base class. If the member of is - a global variable descriptor then it - represents a static member. And, if the member is - a subprogram descriptor then it represents - a member function. For static members and member - functions, getName() returns the members link or the C++ mangled - name. getDisplayName() the simplied version of the name.

- -

The first member of subroutine (tag = DW_TAG_subroutine_type) type - elements is the return type for the subroutine. The remaining elements are - the formal arguments to the subroutine.

- -

Composite type location can be - determined from the context and line number. The size, alignment and - offset are expressed in bits and can be 64 bit values. The alignment is used - to round the offset when embedded in - a composite type (as an example, to keep - float doubles on 64 bit boundaries.) The offset is the bit offset if embedded - in a composite type.

- -
- - -

- Subrange descriptors -

- -
- -
-
-!42 = metadata !{
-  i32,    ;; Tag = 33 + LLVMDebugVersion (DW_TAG_subrange_type)
-  i64,    ;; Low value
-  i64     ;; High value
-}
-
-
- -

These descriptors are used to define ranges of array subscripts for an array - composite type. The low value defines - the lower bounds typically zero for C/C++. The high value is the upper - bounds. Values are 64 bit. High - low + 1 is the size of the array. If low - > high the array bounds are not included in generated debugging information. -

- -
- - -

- Enumerator descriptors -

- -
- -
-
-!6 = metadata !{
-  i32,      ;; Tag = 40 + LLVMDebugVersion
-            ;; (DW_TAG_enumerator)
-  metadata, ;; Name
-  i64       ;; Value
-}
-
-
- -

These descriptors are used to define members of an - enumeration composite type, it - associates the name to the value.

- -
- - -

- Local variables -

- -
- -
-
-!7 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Context
-  metadata, ;; Name
-  metadata, ;; Reference to file where defined
-  i32,      ;; 24 bit - Line number where defined
-            ;; 8 bit - Argument number. 1 indicates 1st argument.
-  metadata, ;; Type descriptor
-  i32,      ;; flags
-  metadata  ;; (optional) Reference to inline location
-}
-
-
- -

These descriptors are used to define variables local to a sub program. The - value of the tag depends on the usage of the variable:

- -
-
-DW_TAG_auto_variable   = 256
-DW_TAG_arg_variable    = 257
-DW_TAG_return_variable = 258
-
-
- -

An auto variable is any variable declared in the body of the function. An - argument variable is any variable that appears as a formal argument to the - function. A return variable is used to track the result of a function and - has no source correspondent.

- -

The context is either the subprogram or block where the variable is defined. - Name the source variable name. Context and line indicate where the - variable was defined. Type descriptor defines the declared type of the - variable.

- -
- -
- - -

- Debugger intrinsic functions -

- -
- -

LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to - provide debug information at various points in generated code.

- - -

- llvm.dbg.declare -

- -
-
-  void %llvm.dbg.declare(metadata, metadata)
-
- -

This intrinsic provides information about a local element (e.g., variable). The - first argument is metadata holding the alloca for the variable. The - second argument is metadata containing a description of the variable.

-
- - -

- llvm.dbg.value -

- -
-
-  void %llvm.dbg.value(metadata, i64, metadata)
-
- -

This intrinsic provides information when a user source variable is set to a - new value. The first argument is the new value (wrapped as metadata). The - second argument is the offset in the user source variable where the new value - is written. The third argument is metadata containing a description of the - user source variable.

-
- -
- - -

- Object lifetimes and scoping -

- -
-

In many languages, the local variables in functions can have their lifetimes - or scopes limited to a subset of a function. In the C family of languages, - for example, variables are only live (readable and writable) within the - source block that they are defined in. In functional languages, values are - only readable after they have been defined. Though this is a very obvious - concept, it is non-trivial to model in LLVM, because it has no notion of - scoping in this sense, and does not want to be tied to a language's scoping - rules.

- -

In order to handle this, the LLVM debug format uses the metadata attached to - llvm instructions to encode line number and scoping information. Consider - the following C fragment, for example:

- -
-
-1.  void foo() {
-2.    int X = 21;
-3.    int Y = 22;
-4.    {
-5.      int Z = 23;
-6.      Z = X;
-7.    }
-8.    X = Y;
-9.  }
-
-
- -

Compiled to LLVM, this function would be represented like this:

- -
-
-define void @foo() nounwind ssp {
-entry:
-  %X = alloca i32, align 4                        ; <i32*> [#uses=4]
-  %Y = alloca i32, align 4                        ; <i32*> [#uses=4]
-  %Z = alloca i32, align 4                        ; <i32*> [#uses=3]
-  %0 = bitcast i32* %X to {}*                     ; <{}*> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
-  store i32 21, i32* %X, !dbg !8
-  %1 = bitcast i32* %Y to {}*                     ; <{}*> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
-  store i32 22, i32* %Y, !dbg !11
-  %2 = bitcast i32* %Z to {}*                     ; <{}*> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
-  store i32 23, i32* %Z, !dbg !15
-  %tmp = load i32* %X, !dbg !16                   ; <i32> [#uses=1]
-  %tmp1 = load i32* %Y, !dbg !16                  ; <i32> [#uses=1]
-  %add = add nsw i32 %tmp, %tmp1, !dbg !16        ; <i32> [#uses=1]
-  store i32 %add, i32* %Z, !dbg !16
-  %tmp2 = load i32* %Y, !dbg !17                  ; <i32> [#uses=1]
-  store i32 %tmp2, i32* %X, !dbg !17
-  ret void, !dbg !18
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!0 = metadata !{i32 459008, metadata !1, metadata !"X",
-                metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
-               metadata !"foo", metadata !3, i32 1, metadata !4,
-               i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
-                metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
-                i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
-                i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
-!5 = metadata !{null}
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
-                i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!8 = metadata !{i32 2, i32 3, metadata !1, null}
-!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
-                metadata !6}; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 3, i32 7, metadata !1, null}
-!11 = metadata !{i32 3, i32 3, metadata !1, null}
-!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
-                 metadata !6}; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-!15 = metadata !{i32 5, i32 5, metadata !13, null}
-!16 = metadata !{i32 6, i32 5, metadata !13, null}
-!17 = metadata !{i32 8, i32 3, metadata !1, null}
-!18 = metadata !{i32 9, i32 1, metadata !2, null}
-
-
- -

This example illustrates a few important details about LLVM debugging - information. In particular, it shows how the llvm.dbg.declare - intrinsic and location information, which are attached to an instruction, - are applied together to allow a debugger to analyze the relationship between - statements, variable definitions, and the code used to implement the - function.

- -
-
-call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
-
-
- -

The first intrinsic - %llvm.dbg.declare - encodes debugging information for the variable X. The metadata - !dbg !7 attached to the intrinsic provides scope information for the - variable X.

- -
-
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
-                metadata !"foo", metadata !"foo", metadata !3, i32 1,
-                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
-
-
- -

Here !7 is metadata providing location information. It has four - fields: line number, column number, scope, and original scope. The original - scope represents inline location if this instruction is inlined inside a - caller, and is null otherwise. In this example, scope is encoded by - !1. !1 represents a lexical block inside the scope - !2, where !2 is a - subprogram descriptor. This way the - location information attached to the intrinsics indicates that the - variable X is declared at line number 2 at a function level scope in - function foo.

- -

Now lets take another example.

- -
-
-call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
-
-
- -

The second intrinsic - %llvm.dbg.declare - encodes debugging information for variable Z. The metadata - !dbg !14 attached to the intrinsic provides scope information for - the variable Z.

- -
-
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-
-
- -

Here !14 indicates that Z is declared at line number 5 and - column number 9 inside of lexical scope !13. The lexical scope - itself resides inside of lexical scope !1 described above.

- -

The scope information attached with each instruction provides a - straightforward way to find instructions covered by a scope.

- -
- -
- - -

- C/C++ front-end specific debug information -

- - -
- -

The C and C++ front-ends represent information about the program in a format - that is effectively identical - to DWARF 3.0 in - terms of information content. This allows code generators to trivially - support native debuggers by generating standard dwarf information, and - contains enough information for non-dwarf targets to translate it as - needed.

- -

This section describes the forms used to represent C and C++ programs. Other - languages could pattern themselves after this (which itself is tuned to - representing programs in the same way that DWARF 3 does), or they could - choose to provide completely different forms if they don't fit into the DWARF - model. As support for debugging information gets added to the various LLVM - source-language front-ends, the information used should be documented - here.

- -

The following sections provide examples of various C/C++ constructs and the - debug information that would best describe those constructs.

- - -

- C/C++ source file information -

- -
- -

Given the source files MySource.cpp and MyHeader.h located - in the directory /Users/mine/sources, the following code:

- -
-
-#include "MyHeader.h"
-
-int main(int argc, char *argv[]) {
-  return 0;
-}
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-...
-;;
-;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
-;;
-!2 = metadata !{
-  i32 524305,    ;; Tag
-  i32 0,         ;; Unused
-  i32 4,         ;; Language Id
-  metadata !"MySource.cpp",
-  metadata !"/Users/mine/sources",
-  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
-  i1 true,       ;; Main Compile Unit
-  i1 false,      ;; Optimized compile unit
-  metadata !"",  ;; Compiler flags
-  i32 0}         ;; Runtime version
-
-;;
-;; Define the file for the file "/Users/mine/sources/MySource.cpp".
-;;
-!1 = metadata !{
-  i32 524329,    ;; Tag
-  metadata !"MySource.cpp",
-  metadata !"/Users/mine/sources",
-  metadata !2    ;; Compile unit
-}
-
-;;
-;; Define the file for the file "/Users/mine/sources/Myheader.h"
-;;
-!3 = metadata !{
-  i32 524329,    ;; Tag
-  metadata !"Myheader.h"
-  metadata !"/Users/mine/sources",
-  metadata !2    ;; Compile unit
-}
-
-...
-
-
- -

llvm::Instruction provides easy access to metadata attached with an -instruction. One can extract line number information encoded in LLVM IR -using Instruction::getMetadata() and -DILocation::getLineNumber(). -

- if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
-   DILocation Loc(N);                      // DILocation is in DebugInfo.h
-   unsigned Line = Loc.getLineNumber();
-   StringRef File = Loc.getFilename();
-   StringRef Dir = Loc.getDirectory();
- }
-
-
- - -

- C/C++ global variable information -

- -
- -

Given an integer global variable declared as follows:

- -
-
-int MyGlobal = 100;
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-;;
-;; Define the global itself.
-;;
-%MyGlobal = global int 100
-...
-;;
-;; List of debug info of globals
-;;
-!llvm.dbg.cu = !{!0}
-
-;; Define the compile unit.
-!0 = metadata !{
-  i32 786449,                       ;; Tag
-  i32 0,                            ;; Context
-  i32 4,                            ;; Language
-  metadata !"foo.cpp",              ;; File
-  metadata !"/Volumes/Data/tmp",    ;; Directory
-  metadata !"clang version 3.1 ",   ;; Producer
-  i1 true,                          ;; Deprecated field
-  i1 false,                         ;; "isOptimized"?
-  metadata !"",                     ;; Flags
-  i32 0,                            ;; Runtime Version
-  metadata !1,                      ;; Enum Types
-  metadata !1,                      ;; Retained Types
-  metadata !1,                      ;; Subprograms
-  metadata !3                       ;; Global Variables
-} ; [ DW_TAG_compile_unit ]
-
-;; The Array of Global Variables
-!3 = metadata !{
-  metadata !4
-}
-
-!4 = metadata !{
-  metadata !5
-}
-
-;;
-;; Define the global variable itself.
-;;
-!5 = metadata !{
-  i32 786484,                        ;; Tag
-  i32 0,                             ;; Unused
-  null,                              ;; Unused
-  metadata !"MyGlobal",              ;; Name
-  metadata !"MyGlobal",              ;; Display Name
-  metadata !"",                      ;; Linkage Name
-  metadata !6,                       ;; File
-  i32 1,                             ;; Line
-  metadata !7,                       ;; Type
-  i32 0,                             ;; IsLocalToUnit
-  i32 1,                             ;; IsDefinition
-  i32* @MyGlobal                     ;; LLVM-IR Value
-} ; [ DW_TAG_variable ]
-
-;;
-;; Define the file
-;;
-!6 = metadata !{
-  i32 786473,                        ;; Tag
-  metadata !"foo.cpp",               ;; File
-  metadata !"/Volumes/Data/tmp",     ;; Directory
-  null                               ;; Unused
-} ; [ DW_TAG_file_type ]
-
-;;
-;; Define the type
-;;
-!7 = metadata !{
-  i32 786468,                         ;; Tag
-  null,                               ;; Unused
-  metadata !"int",                    ;; Name
-  null,                               ;; Unused
-  i32 0,                              ;; Line
-  i64 32,                             ;; Size in Bits
-  i64 32,                             ;; Align in Bits
-  i64 0,                              ;; Offset
-  i32 0,                              ;; Flags
-  i32 5                               ;; Encoding
-} ; [ DW_TAG_base_type ]
-
-
-
- -
- - -

- C/C++ function information -

- -
- -

Given a function declared as follows:

- -
-
-int main(int argc, char *argv[]) {
-  return 0;
-}
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-;;
-;; Define the anchor for subprograms.  Note that the second field of the
-;; anchor is 46, which is the same as the tag for subprograms
-;; (46 = DW_TAG_subprogram.)
-;;
-!6 = metadata !{
-  i32 524334,        ;; Tag
-  i32 0,             ;; Unused
-  metadata !1,       ;; Context
-  metadata !"main",  ;; Name
-  metadata !"main",  ;; Display name
-  metadata !"main",  ;; Linkage name
-  metadata !1,       ;; File
-  i32 1,             ;; Line number
-  metadata !4,       ;; Type
-  i1 false,          ;; Is local
-  i1 true,           ;; Is definition
-  i32 0,             ;; Virtuality attribute, e.g. pure virtual function
-  i32 0,             ;; Index into virtual table for C++ methods
-  i32 0,             ;; Type that holds virtual table.
-  i32 0,             ;; Flags
-  i1 false,          ;; True if this function is optimized
-  Function *,        ;; Pointer to llvm::Function
-  null               ;; Function template parameters
-}
-;;
-;; Define the subprogram itself.
-;;
-define i32 @main(i32 %argc, i8** %argv) {
-...
-}
-
-
- -
- - -

- C/C++ basic types -

- -
- -

The following are the basic type descriptors for C/C++ core types:

- - -

- bool -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"bool",  ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 2              ;; Encoding
-}
-
-
- -
- - -

- char -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"char",  ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 6              ;; Encoding
-}
-
-
- -
- - -

- unsigned char -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned char",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 8              ;; Encoding
-}
-
-
- -
- - -

- short -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"short int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 16,            ;; Size in Bits
-  i64 16,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-
-
- -
- - -

- unsigned short -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"short unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 16,            ;; Size in Bits
-  i64 16,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-
-
- -
- - -

- int -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"int",   ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-
- -
- - -

- unsigned int -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-
-
- -
- - -

- long long -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"long long int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-
-
- -
- - -

- unsigned long long -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"long long unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-
-
- -
- - -

- float -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"float",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 4              ;; Encoding
-}
-
-
- -
- - -

- double -

- -
- -
-
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"double",;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 4              ;; Encoding
-}
-
-
- -
- -
- - -

- C/C++ derived types -

- -
- -

Given the following as an example of C/C++ derived type:

- -
-
-typedef const int *IntPtr;
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-;;
-;; Define the typedef "IntPtr".
-;;
-!2 = metadata !{
-  i32 524310,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"IntPtr",  ;; Name
-  metadata !3,         ;; File
-  i32 0,               ;; Line number
-  i64 0,               ;; Size in bits
-  i64 0,               ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !4          ;; Derived From type
-}
-
-;;
-;; Define the pointer type.
-;;
-!4 = metadata !{
-  i32 524303,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"",        ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 64,              ;; Size in bits
-  i64 64,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !5          ;; Derived From type
-}
-;;
-;; Define the const type.
-;;
-!5 = metadata !{
-  i32 524326,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"",        ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 32,              ;; Size in bits
-  i64 32,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !6          ;; Derived From type
-}
-;;
-;; Define the int type.
-;;
-!6 = metadata !{
-  i32 524324,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"int",     ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 32,              ;; Size in bits
-  i64 32,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  5                    ;; Encoding
-}
-
-
- -
- - -

- C/C++ struct/union types -

- -
- -

Given the following as an example of C/C++ struct type:

- -
-
-struct Color {
-  unsigned Red;
-  unsigned Green;
-  unsigned Blue;
-};
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-;;
-;; Define basic type for unsigned int.
-;;
-!5 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-;;
-;; Define composite type for struct Color.
-;;
-!2 = metadata !{
-  i32 524307,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Color", ;; Name
-  metadata !1,       ;; Compile unit
-  i32 1,             ;; Line number
-  i64 96,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  null,              ;; Derived From
-  metadata !3,       ;; Elements
-  i32 0              ;; Runtime Language
-}
-
-;;
-;; Define the Red field.
-;;
-!4 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Red",   ;; Name
-  metadata !1,       ;; File
-  i32 2,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the Green field.
-;;
-!6 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Green", ;; Name
-  metadata !1,       ;; File
-  i32 3,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 32,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the Blue field.
-;;
-!7 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Blue",  ;; Name
-  metadata !1,       ;; File
-  i32 4,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 64,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the array of fields used by the composite type Color.
-;;
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-
-
- -
- - -

- C/C++ enumeration types -

- -
- -

Given the following as an example of C/C++ enumeration type:

- -
-
-enum Trees {
-  Spruce = 100,
-  Oak = 200,
-  Maple = 300
-};
-
-
- -

a C/C++ front-end would generate the following descriptors:

- -
-
-;;
-;; Define composite type for enum Trees
-;;
-!2 = metadata !{
-  i32 524292,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Trees", ;; Name
-  metadata !1,       ;; File
-  i32 1,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  null,              ;; Derived From type
-  metadata !3,       ;; Elements
-  i32 0              ;; Runtime language
-}
-
-;;
-;; Define the array of enumerators used by composite type Trees.
-;;
-!3 = metadata !{metadata !4, metadata !5, metadata !6}
-
-;;
-;; Define Spruce enumerator.
-;;
-!4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
-
-;;
-;; Define Oak enumerator.
-;;
-!5 = metadata !{i32 524328, metadata !"Oak", i64 200}
-
-;;
-;; Define Maple enumerator.
-;;
-!6 = metadata !{i32 524328, metadata !"Maple", i64 300}
-
-
-
- -
- -
- - - -

- Debugging information format -

- -
- -

- Debugging Information Extension for Objective C Properties -

-
- -

- Introduction -

- - -
-

Objective C provides a simpler way to declare and define accessor methods -using declared properties. The language provides features to declare a -property and to let compiler synthesize accessor methods. -

- -

The debugger lets developer inspect Objective C interfaces and their -instance variables and class variables. However, the debugger does not know -anything about the properties defined in Objective C interfaces. The debugger -consumes information generated by compiler in DWARF format. The format does -not support encoding of Objective C properties. This proposal describes DWARF -extensions to encode Objective C properties, which the debugger can use to let -developers inspect Objective C properties. -

- -
- - - -

- Proposal -

- - -
-

Objective C properties exist separately from class members. A property -can be defined only by "setter" and "getter" selectors, and -be calculated anew on each access. Or a property can just be a direct access -to some declared ivar. Finally it can have an ivar "automatically -synthesized" for it by the compiler, in which case the property can be -referred to in user code directly using the standard C dereference syntax as -well as through the property "dot" syntax, but there is no entry in -the @interface declaration corresponding to this ivar. -

-

-To facilitate debugging, these properties we will add a new DWARF TAG into the -DW_TAG_structure_type definition for the class to hold the description of a -given property, and a set of DWARF attributes that provide said description. -The property tag will also contain the name and declared type of the property. -

-

-If there is a related ivar, there will also be a DWARF property attribute placed -in the DW_TAG_member DIE for that ivar referring back to the property TAG for -that property. And in the case where the compiler synthesizes the ivar directly, -the compiler is expected to generate a DW_TAG_member for that ivar (with the -DW_AT_artificial set to 1), whose name will be the name used to access this -ivar directly in code, and with the property attribute pointing back to the -property it is backing. -

-

-The following examples will serve as illustration for our discussion: -

- -
-
-@interface I1 {
-  int n2;
-}
-
-@property int p1;
-@property int p2;
-@end
-
-@implementation I1
-@synthesize p1;
-@synthesize p2 = n2;
-@end
-
-
- -

-This produces the following DWARF (this is a "pseudo dwarfdump" output): -

-
-
-0x00000100:  TAG_structure_type [7] *
-               AT_APPLE_runtime_class( 0x10 )
-               AT_name( "I1" )
-               AT_decl_file( "Objc_Property.m" )
-               AT_decl_line( 3 )
-
-0x00000110    TAG_APPLE_property
-                AT_name ( "p1" )
-                AT_type ( {0x00000150} ( int ) )
-
-0x00000120:   TAG_APPLE_property
-                AT_name ( "p2" )
-                AT_type ( {0x00000150} ( int ) )
-
-0x00000130:   TAG_member [8]
-                AT_name( "_p1" )
-                AT_APPLE_property ( {0x00000110} "p1" )
-                AT_type( {0x00000150} ( int ) )
-                AT_artificial ( 0x1 )
-
-0x00000140:    TAG_member [8]
-                 AT_name( "n2" )
-                 AT_APPLE_property ( {0x00000120} "p2" )
-                 AT_type( {0x00000150} ( int ) )
-
-0x00000150:  AT_type( ( int ) )
-
-
- -

Note, the current convention is that the name of the ivar for an -auto-synthesized property is the name of the property from which it derives with -an underscore prepended, as is shown in the example. -But we actually don't need to know this convention, since we are given the name -of the ivar directly. -

- -

-Also, it is common practice in ObjC to have different property declarations in -the @interface and @implementation - e.g. to provide a read-only property in -the interface,and a read-write interface in the implementation. In that case, -the compiler should emit whichever property declaration will be in force in the -current translation unit. -

- -

Developers can decorate a property with attributes which are encoded using -DW_AT_APPLE_property_attribute. -

- -
-
-@property (readonly, nonatomic) int pr;
-
-
-

-Which produces a property tag: -

-

-
-TAG_APPLE_property [8]
-  AT_name( "pr" )
-  AT_type ( {0x00000147} (int) )
-  AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
-
-
- -

The setter and getter method names are attached to the property using -DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes. -

-
-
-@interface I1
-@property (setter=myOwnP3Setter:) int p3;
--(void)myOwnP3Setter:(int)a;
-@end
-
-@implementation I1
-@synthesize p3;
--(void)myOwnP3Setter:(int)a{ }
-@end
-
-
- -

-The DWARF for this would be: -

-
-
-0x000003bd: TAG_structure_type [7] *
-              AT_APPLE_runtime_class( 0x10 )
-              AT_name( "I1" )
-              AT_decl_file( "Objc_Property.m" )
-              AT_decl_line( 3 )
-
-0x000003cd      TAG_APPLE_property
-                  AT_name ( "p3" )
-                  AT_APPLE_property_setter ( "myOwnP3Setter:" )
-                  AT_type( {0x00000147} ( int ) )
-
-0x000003f3:     TAG_member [8]
-                  AT_name( "_p3" )
-                  AT_type ( {0x00000147} ( int ) )
-                  AT_APPLE_property ( {0x000003cd} )
-                  AT_artificial ( 0x1 )
-
-
- -
- - -

- New DWARF Tags -

- - -
- - - - - - - - - - - -
TAGValue
DW_TAG_APPLE_property0x4200
- -
- - -

- New DWARF Attributes -

- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
AttributeValueClasses
DW_AT_APPLE_property0x3fedReference
DW_AT_APPLE_property_getter0x3fe9String
DW_AT_APPLE_property_setter0x3feaString
DW_AT_APPLE_property_attribute0x3febConstant
- -
- - -

- New DWARF Constants -

- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameValue
DW_AT_APPLE_PROPERTY_readonly0x1
DW_AT_APPLE_PROPERTY_readwrite0x2
DW_AT_APPLE_PROPERTY_assign0x4
DW_AT_APPLE_PROPERTY_retain0x8
DW_AT_APPLE_PROPERTY_copy0x10
DW_AT_APPLE_PROPERTY_nonatomic0x20
- -
-
- - -

- Name Accelerator Tables -

- -
- -

- Introduction -

- -
-

The .debug_pubnames and .debug_pubtypes formats are not what a debugger - needs. The "pub" in the section name indicates that the entries in the - table are publicly visible names only. This means no static or hidden - functions show up in the .debug_pubnames. No static variables or private class - variables are in the .debug_pubtypes. Many compilers add different things to - these tables, so we can't rely upon the contents between gcc, icc, or clang.

- -

The typical query given by users tends not to match up with the contents of - these tables. For example, the DWARF spec states that "In the case of the - name of a function member or static data member of a C++ structure, class or - union, the name presented in the .debug_pubnames section is not the simple - name given by the DW_AT_name attribute of the referenced debugging information - entry, but rather the fully qualified name of the data or function member." - So the only names in these tables for complex C++ entries is a fully - qualified name. Debugger users tend not to enter their search strings as - "a::b::c(int,const Foo&) const", but rather as "c", "b::c" , or "a::b::c". So - the name entered in the name table must be demangled in order to chop it up - appropriately and additional names must be manually entered into the table - to make it effective as a name lookup table for debuggers to use.

- -

All debuggers currently ignore the .debug_pubnames table as a result of - its inconsistent and useless public-only name content making it a waste of - space in the object file. These tables, when they are written to disk, are - not sorted in any way, leaving every debugger to do its own parsing - and sorting. These tables also include an inlined copy of the string values - in the table itself making the tables much larger than they need to be on - disk, especially for large C++ programs.

- -

Can't we just fix the sections by adding all of the names we need to this - table? No, because that is not what the tables are defined to contain and we - won't know the difference between the old bad tables and the new good tables. - At best we could make our own renamed sections that contain all of the data - we need.

- -

These tables are also insufficient for what a debugger like LLDB needs. - LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is - then often asked to look for type "foo" or namespace "bar", or list items in - namespace "baz". Namespaces are not included in the pubnames or pubtypes - tables. Since clang asks a lot of questions when it is parsing an expression, - we need to be very fast when looking up names, as it happens a lot. Having new - accelerator tables that are optimized for very quick lookups will benefit - this type of debugging experience greatly.

- -

We would like to generate name lookup tables that can be mapped into - memory from disk, and used as is, with little or no up-front parsing. We would - also be able to control the exact content of these different tables so they - contain exactly what we need. The Name Accelerator Tables were designed - to fix these issues. In order to solve these issues we need to:

- -
    -
  • Have a format that can be mapped into memory from disk and used as is
  • -
  • Lookups should be very fast
  • -
  • Extensible table format so these tables can be made by many producers
  • -
  • Contain all of the names needed for typical lookups out of the box
  • -
  • Strict rules for the contents of tables
  • -
- -

Table size is important and the accelerator table format should allow the - reuse of strings from common string tables so the strings for the names are - not duplicated. We also want to make sure the table is ready to be used as-is - by simply mapping the table into memory with minimal header parsing.

- -

The name lookups need to be fast and optimized for the kinds of lookups - that debuggers tend to do. Optimally we would like to touch as few parts of - the mapped table as possible when doing a name lookup and be able to quickly - find the name entry we are looking for, or discover there are no matches. In - the case of debuggers we optimized for lookups that fail most of the time.

- -

Each table that is defined should have strict rules on exactly what is in - the accelerator tables and documented so clients can rely on the content.

- -
- - -

- Hash Tables -

- - -
-
Standard Hash Tables
- -

Typical hash tables have a header, buckets, and each bucket points to the -bucket contents: -

- -
-
-.------------.
-|  HEADER    |
-|------------|
-|  BUCKETS   |
-|------------|
-|  DATA      |
-`------------'
-
-
- -

The BUCKETS are an array of offsets to DATA for each hash:

- -
-
-.------------.
-| 0x00001000 | BUCKETS[0]
-| 0x00002000 | BUCKETS[1]
-| 0x00002200 | BUCKETS[2]
-| 0x000034f0 | BUCKETS[3]
-|            | ...
-| 0xXXXXXXXX | BUCKETS[n_buckets]
-'------------'
-
-
- -

So for bucket[3] in the example above, we have an offset into the table - 0x000034f0 which points to a chain of entries for the bucket. Each bucket - must contain a next pointer, full 32 bit hash value, the string itself, - and the data for the current string value.

- -
-
-            .------------.
-0x000034f0: | 0x00003500 | next pointer
-            | 0x12345678 | 32 bit hash
-            | "erase"    | string value
-            | data[n]    | HashData for this bucket
-            |------------|
-0x00003500: | 0x00003550 | next pointer
-            | 0x29273623 | 32 bit hash
-            | "dump"     | string value
-            | data[n]    | HashData for this bucket
-            |------------|
-0x00003550: | 0x00000000 | next pointer
-            | 0x82638293 | 32 bit hash
-            | "main"     | string value
-            | data[n]    | HashData for this bucket
-            `------------'
-
-
- -

The problem with this layout for debuggers is that we need to optimize for - the negative lookup case where the symbol we're searching for is not present. - So if we were to lookup "printf" in the table above, we would make a 32 hash - for "printf", it might match bucket[3]. We would need to go to the offset - 0x000034f0 and start looking to see if our 32 bit hash matches. To do so, we - need to read the next pointer, then read the hash, compare it, and skip to - the next bucket. Each time we are skipping many bytes in memory and touching - new cache pages just to do the compare on the full 32 bit hash. All of these - accesses then tell us that we didn't have a match.

- -
Name Hash Tables
- -

To solve the issues mentioned above we have structured the hash tables - a bit differently: a header, buckets, an array of all unique 32 bit hash - values, followed by an array of hash value data offsets, one for each hash - value, then the data for all hash values:

- -
-
-.-------------.
-|  HEADER     |
-|-------------|
-|  BUCKETS    |
-|-------------|
-|  HASHES     |
-|-------------|
-|  OFFSETS    |
-|-------------|
-|  DATA       |
-`-------------'
-
-
- -

The BUCKETS in the name tables are an index into the HASHES array. By - making all of the full 32 bit hash values contiguous in memory, we allow - ourselves to efficiently check for a match while touching as little - memory as possible. Most often checking the 32 bit hash values is as far as - the lookup goes. If it does match, it usually is a match with no collisions. - So for a table with "n_buckets" buckets, and "n_hashes" unique 32 bit hash - values, we can clarify the contents of the BUCKETS, HASHES and OFFSETS as:

- -
-
-.-------------------------.
-|  HEADER.magic           | uint32_t
-|  HEADER.version         | uint16_t
-|  HEADER.hash_function   | uint16_t
-|  HEADER.bucket_count    | uint32_t
-|  HEADER.hashes_count    | uint32_t
-|  HEADER.header_data_len | uint32_t
-|  HEADER_DATA            | HeaderData
-|-------------------------|
-|  BUCKETS                | uint32_t[bucket_count] // 32 bit hash indexes
-|-------------------------|
-|  HASHES                 | uint32_t[hashes_count] // 32 bit hash values
-|-------------------------|
-|  OFFSETS                | uint32_t[hashes_count] // 32 bit offsets to hash value data
-|-------------------------|
-|  ALL HASH DATA          |
-`-------------------------'
-
-
- -

So taking the exact same data from the standard hash example above we end up - with:

- -
-
-            .------------.
-            | HEADER     |
-            |------------|
-            |          0 | BUCKETS[0]
-            |          2 | BUCKETS[1]
-            |          5 | BUCKETS[2]
-            |          6 | BUCKETS[3]
-            |            | ...
-            |        ... | BUCKETS[n_buckets]
-            |------------|
-            | 0x........ | HASHES[0]
-            | 0x........ | HASHES[1]
-            | 0x........ | HASHES[2]
-            | 0x........ | HASHES[3]
-            | 0x........ | HASHES[4]
-            | 0x........ | HASHES[5]
-            | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
-            | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
-            | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
-            | 0x........ | HASHES[9]
-            | 0x........ | HASHES[10]
-            | 0x........ | HASHES[11]
-            | 0x........ | HASHES[12]
-            | 0x........ | HASHES[13]
-            | 0x........ | HASHES[n_hashes]
-            |------------|
-            | 0x........ | OFFSETS[0]
-            | 0x........ | OFFSETS[1]
-            | 0x........ | OFFSETS[2]
-            | 0x........ | OFFSETS[3]
-            | 0x........ | OFFSETS[4]
-            | 0x........ | OFFSETS[5]
-            | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
-            | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
-            | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
-            | 0x........ | OFFSETS[9]
-            | 0x........ | OFFSETS[10]
-            | 0x........ | OFFSETS[11]
-            | 0x........ | OFFSETS[12]
-            | 0x........ | OFFSETS[13]
-            | 0x........ | OFFSETS[n_hashes]
-            |------------|
-            |            |
-            |            |
-            |            |
-            |            |
-            |            |
-            |------------|
-0x000034f0: | 0x00001203 | .debug_str ("erase")
-            | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x........ | HashData[3]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            |------------|
-0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
-            | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x00001203 | String offset into .debug_str ("dump")
-            | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            |------------|
-0x00003550: | 0x00001203 | String offset into .debug_str ("main")
-            | 0x00000009 | A 32 bit array count - number of HashData with name "main"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x........ | HashData[3]
-            | 0x........ | HashData[4]
-            | 0x........ | HashData[5]
-            | 0x........ | HashData[6]
-            | 0x........ | HashData[7]
-            | 0x........ | HashData[8]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            `------------'
-
-
- -

So we still have all of the same data, we just organize it more efficiently - for debugger lookup. If we repeat the same "printf" lookup from above, we - would hash "printf" and find it matches BUCKETS[3] by taking the 32 bit hash - value and modulo it by n_buckets. BUCKETS[3] contains "6" which is the index - into the HASHES table. We would then compare any consecutive 32 bit hashes - values in the HASHES array as long as the hashes would be in BUCKETS[3]. We - do this by verifying that each subsequent hash value modulo n_buckets is still - 3. In the case of a failed lookup we would access the memory for BUCKETS[3], and - then compare a few consecutive 32 bit hashes before we know that we have no match. - We don't end up marching through multiple words of memory and we really keep the - number of processor data cache lines being accessed as small as possible.

- -

The string hash that is used for these lookup tables is the Daniel J. - Bernstein hash which is also used in the ELF GNU_HASH sections. It is a very - good hash for all kinds of names in programs with very few hash collisions.

- -

Empty buckets are designated by using an invalid hash index of UINT32_MAX.

-
- - -

- Details -

- -
-

These name hash tables are designed to be generic where specializations of - the table get to define additional data that goes into the header - ("HeaderData"), how the string value is stored ("KeyType") and the content - of the data for each hash value.

- -
Header Layout
-

The header has a fixed part, and the specialized part. The exact format of - the header is:

-
-
-struct Header
-{
-  uint32_t   magic;           // 'HASH' magic value to allow endian detection
-  uint16_t   version;         // Version number
-  uint16_t   hash_function;   // The hash function enumeration that was used
-  uint32_t   bucket_count;    // The number of buckets in this hash table
-  uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
-  uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
-                              // Specifically the length of the following HeaderData field - this does not
-                              // include the size of the preceding fields
-  HeaderData header_data;     // Implementation specific header data
-};
-
-
-

The header starts with a 32 bit "magic" value which must be 'HASH' encoded as - an ASCII integer. This allows the detection of the start of the hash table and - also allows the table's byte order to be determined so the table can be - correctly extracted. The "magic" value is followed by a 16 bit version number - which allows the table to be revised and modified in the future. The current - version number is 1. "hash_function" is a uint16_t enumeration that specifies - which hash function was used to produce this table. The current values for the - hash function enumerations include:

-
-
-enum HashFunctionType
-{
-  eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
-};
-
-
-

"bucket_count" is a 32 bit unsigned integer that represents how many buckets - are in the BUCKETS array. "hashes_count" is the number of unique 32 bit hash - values that are in the HASHES array, and is the same number of offsets are - contained in the OFFSETS array. "header_data_len" specifies the size in - bytes of the HeaderData that is filled in by specialized versions of this - table.

- -
Fixed Lookup
-

The header is followed by the buckets, hashes, offsets, and hash value - data. -

-
-struct FixedTable
-{
-  uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
-  uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
-  uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
-};
-
-
-

"buckets" is an array of 32 bit indexes into the "hashes" array. The - "hashes" array contains all of the 32 bit hash values for all names in the - hash table. Each hash in the "hashes" table has an offset in the "offsets" - array that points to the data for the hash value.

- -

This table setup makes it very easy to repurpose these tables to contain - different data, while keeping the lookup mechanism the same for all tables. - This layout also makes it possible to save the table to disk and map it in - later and do very efficient name lookups with little or no parsing.

- -

DWARF lookup tables can be implemented in a variety of ways and can store - a lot of information for each name. We want to make the DWARF tables - extensible and able to store the data efficiently so we have used some of the - DWARF features that enable efficient data storage to define exactly what kind - of data we store for each name.

- -

The "HeaderData" contains a definition of the contents of each HashData - chunk. We might want to store an offset to all of the debug information - entries (DIEs) for each name. To keep things extensible, we create a list of - items, or Atoms, that are contained in the data for each name. First comes the - type of the data in each atom:

-
-
-enum AtomType
-{
-  eAtomTypeNULL       = 0u,
-  eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
-  eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
-  eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
-  eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
-  eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
-};
-
-
-

The enumeration values and their meanings are:

-
-
-  eAtomTypeNULL       - a termination atom that specifies the end of the atom list
-  eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
-  eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
-  eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
-  eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
-  eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
-
-
-

Then we allow each atom type to define the atom type and how the data for - each atom type data is encoded:

-
-
-struct Atom
-{
-  uint16_t type;  // AtomType enum value
-  uint16_t form;  // DWARF DW_FORM_XXX defines
-};
-
-
-

The "form" type above is from the DWARF specification and defines the - exact encoding of the data for the Atom type. See the DWARF specification for - the DW_FORM_ definitions.

-
-
-struct HeaderData
-{
-  uint32_t die_offset_base;
-  uint32_t atom_count;
-  Atoms    atoms[atom_count0];
-};
-
-
-

"HeaderData" defines the base DIE offset that should be added to any atoms - that are encoded using the DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4, - DW_FORM_ref8 or DW_FORM_ref_udata. It also defines what is contained in - each "HashData" object -- Atom.form tells us how large each field will be in - the HashData and the Atom.type tells us how this data should be interpreted.

- -

For the current implementations of the ".apple_names" (all functions + globals), - the ".apple_types" (names of all types that are defined), and the - ".apple_namespaces" (all namespaces), we currently set the Atom array to be:

-
-
-HeaderData.atom_count = 1;
-HeaderData.atoms[0].type = eAtomTypeDIEOffset;
-HeaderData.atoms[0].form = DW_FORM_data4;
-
-
-

This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is - encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have - multiple matching DIEs in a single file, which could come up with an inlined - function for instance. Future tables could include more information about the - DIE such as flags indicating if the DIE is a function, method, block, - or inlined.

- -

The KeyType for the DWARF table is a 32 bit string table offset into the - ".debug_str" table. The ".debug_str" is the string table for the DWARF which - may already contain copies of all of the strings. This helps make sure, with - help from the compiler, that we reuse the strings between all of the DWARF - sections and keeps the hash table size down. Another benefit to having the - compiler generate all strings as DW_FORM_strp in the debug info, is that - DWARF parsing can be made much faster.

- -

After a lookup is made, we get an offset into the hash data. The hash data - needs to be able to deal with 32 bit hash collisions, so the chunk of data - at the offset in the hash data consists of a triple:

-
-
-uint32_t str_offset
-uint32_t hash_data_count
-HashData[hash_data_count]
-
-
-

If "str_offset" is zero, then the bucket contents are done. 99.9% of the - hash data chunks contain a single item (no 32 bit hash collision):

-
-
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-
-
-

If there are collisions, you will have multiple valid string offsets:

-
-
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
-| 0x00000002 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-
-
-

Current testing with real world C++ binaries has shown that there is around 1 - 32 bit hash collision per 100,000 name entries.

-
- -

- Contents -

- -
-

As we said, we want to strictly define exactly what is included in the - different tables. For DWARF, we have 3 tables: ".apple_names", ".apple_types", - and ".apple_namespaces".

- -

".apple_names" sections should contain an entry for each DWARF DIE whose - DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or DW_TAG_subprogram that - has address attributes: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges or - DW_AT_entry_pc. It also contains DW_TAG_variable DIEs that have a DW_OP_addr - in the location (global and static variables). All global and static variables - should be included, including those scoped within functions and classes. For - example using the following code:

-
-
-static int var = 0;
-
-void f ()
-{
-  static int var = 0;
-}
-
-
-

Both of the static "var" variables would be included in the table. All - functions should emit both their full names and their basenames. For C or C++, - the full name is the mangled name (if available) which is usually in the - DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the function - basename. If global or static variables have a mangled name in a - DW_AT_MIPS_linkage_name attribute, this should be emitted along with the - simple name found in the DW_AT_name attribute.

- -

".apple_types" sections should contain an entry for each DWARF DIE whose - tag is one of:

-
    -
  • DW_TAG_array_type
  • -
  • DW_TAG_class_type
  • -
  • DW_TAG_enumeration_type
  • -
  • DW_TAG_pointer_type
  • -
  • DW_TAG_reference_type
  • -
  • DW_TAG_string_type
  • -
  • DW_TAG_structure_type
  • -
  • DW_TAG_subroutine_type
  • -
  • DW_TAG_typedef
  • -
  • DW_TAG_union_type
  • -
  • DW_TAG_ptr_to_member_type
  • -
  • DW_TAG_set_type
  • -
  • DW_TAG_subrange_type
  • -
  • DW_TAG_base_type
  • -
  • DW_TAG_const_type
  • -
  • DW_TAG_constant
  • -
  • DW_TAG_file_type
  • -
  • DW_TAG_namelist
  • -
  • DW_TAG_packed_type
  • -
  • DW_TAG_volatile_type
  • -
  • DW_TAG_restrict_type
  • -
  • DW_TAG_interface_type
  • -
  • DW_TAG_unspecified_type
  • -
  • DW_TAG_shared_type
  • -
-

Only entries with a DW_AT_name attribute are included, and the entry must - not be a forward declaration (DW_AT_declaration attribute with a non-zero value). - For example, using the following code:

-
-
-int main ()
-{
-  int *b = 0;
-  return *b;
-}
-
-
-

We get a few type DIEs:

-
-
-0x00000067:     TAG_base_type [5]
-                AT_encoding( DW_ATE_signed )
-                AT_name( "int" )
-                AT_byte_size( 0x04 )
-
-0x0000006e:     TAG_pointer_type [6]
-                AT_type( {0x00000067} ( int ) )
-                AT_byte_size( 0x08 )
-
-
-

The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.

- -

".apple_namespaces" section should contain all DW_TAG_namespace DIEs. If - we run into a namespace that has no name this is an anonymous namespace, - and the name should be output as "(anonymous namespace)" (without the quotes). - Why? This matches the output of the abi::cxa_demangle() that is in the standard - C++ library that demangles mangled names.

-
- - -

- Language Extensions and File Format Changes -

- -
-
Objective-C Extensions
-

".apple_objc" section should contain all DW_TAG_subprogram DIEs for an - Objective-C class. The name used in the hash table is the name of the - Objective-C class itself. If the Objective-C class has a category, then an - entry is made for both the class name without the category, and for the class - name with the category. So if we have a DIE at offset 0x1234 with a name - of method "-[NSString(my_additions) stringWithSpecialString:]", we would add - an entry for "NSString" that points to DIE 0x1234, and an entry for - "NSString(my_additions)" that points to 0x1234. This allows us to quickly - track down all Objective-C methods for an Objective-C class when doing - expressions. It is needed because of the dynamic nature of Objective-C where - anyone can add methods to a class. The DWARF for Objective-C methods is also - emitted differently from C++ classes where the methods are not usually - contained in the class definition, they are scattered about across one or more - compile units. Categories can also be defined in different shared libraries. - So we need to be able to quickly find all of the methods and class functions - given the Objective-C class name, or quickly find all methods and class - functions for a class + category name. This table does not contain any selector - names, it just maps Objective-C class names (or class names + category) to all - of the methods and class functions. The selectors are added as function - basenames in the .debug_names section.

- -

In the ".apple_names" section for Objective-C functions, the full name is the - entire function name with the brackets ("-[NSString stringWithCString:]") and the - basename is the selector only ("stringWithCString:").

- -
Mach-O Changes
-

The sections names for the apple hash tables are for non mach-o files. For - mach-o files, the sections should be contained in the "__DWARF" segment with - names as follows:

-
    -
  • ".apple_names" -> "__apple_names"
  • -
  • ".apple_types" -> "__apple_types"
  • -
  • ".apple_namespaces" -> "__apple_namespac" (16 character limit)
  • -
  • ".apple_objc" -> "__apple_objc"
  • -
-
-
-
- - - -
-
- Valid CSS - Valid HTML 4.01 - - Chris Lattner
- LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-09 01:54:10 +0200 (Tue, 09 Oct 2012) $ -
- - - diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst new file mode 100644 index 000000000000..857479508a5e --- /dev/null +++ b/docs/SourceLevelDebugging.rst @@ -0,0 +1,2281 @@ +================================ +Source Level Debugging with LLVM +================================ + +.. contents:: + :local: + +Introduction +============ + +This document is the central repository for all information pertaining to debug +information in LLVM. It describes the :ref:`actual format that the LLVM debug +information takes `, which is useful for those interested in creating +front-ends or dealing directly with the information. Further, this document +provides specific examples of what debug information for C/C++ looks like. + +Philosophy behind LLVM debugging information +-------------------------------------------- + +The idea of the LLVM debugging information is to capture how the important +pieces of the source-language's Abstract Syntax Tree map onto LLVM code. +Several design aspects have shaped the solution that appears here. The +important ones are: + +* Debugging information should have very little impact on the rest of the + compiler. No transformations, analyses, or code generators should need to + be modified because of debugging information. + +* LLVM optimizations should interact in :ref:`well-defined and easily described + ways ` with the debugging information. + +* Because LLVM is designed to support arbitrary programming languages, + LLVM-to-LLVM tools should not need to know anything about the semantics of + the source-level-language. + +* Source-level languages are often **widely** different from one another. + LLVM should not put any restrictions of the flavor of the source-language, + and the debugging information should work with any language. + +* With code generator support, it should be possible to use an LLVM compiler + to compile a program to native machine code and standard debugging + formats. This allows compatibility with traditional machine-code level + debuggers, like GDB or DBX. + +The approach used by the LLVM implementation is to use a small set of +:ref:`intrinsic functions ` to define a mapping +between LLVM program objects and the source-level objects. The description of +the source-level program is maintained in LLVM metadata in an +:ref:`implementation-defined format ` (the C/C++ front-end +currently uses working draft 7 of the `DWARF 3 standard +`_). + +When a program is being debugged, a debugger interacts with the user and turns +the stored debug information into source-language specific information. As +such, a debugger must be aware of the source-language, and is thus tied to a +specific language or family of languages. + +Debug information consumers +--------------------------- + +The role of debug information is to provide meta information normally stripped +away during the compilation process. This meta information provides an LLVM +user a relationship between generated code and the original program source +code. + +Currently, debug information is consumed by DwarfDebug to produce dwarf +information used by the gdb debugger. Other targets could use the same +information to produce stabs or other debug forms. + +It would also be reasonable to use debug information to feed profiling tools +for analysis of generated code, or, tools for reconstructing the original +source from generated code. + +TODO - expound a bit more. + +.. _intro_debugopt: + +Debugging optimized code +------------------------ + +An extremely high priority of LLVM debugging information is to make it interact +well with optimizations and analysis. In particular, the LLVM debug +information provides the following guarantees: + +* LLVM debug information **always provides information to accurately read + the source-level state of the program**, regardless of which LLVM + optimizations have been run, and without any modification to the + optimizations themselves. However, some optimizations may impact the + ability to modify the current state of the program with a debugger, such + as setting program variables, or calling functions that have been + deleted. + +* As desired, LLVM optimizations can be upgraded to be aware of the LLVM + debugging information, allowing them to update the debugging information + as they perform aggressive optimizations. This means that, with effort, + the LLVM optimizers could optimize debug code just as well as non-debug + code. + +* LLVM debug information does not prevent optimizations from + happening (for example inlining, basic block reordering/merging/cleanup, + tail duplication, etc). + +* LLVM debug information is automatically optimized along with the rest of + the program, using existing facilities. For example, duplicate + information is automatically merged by the linker, and unused information + is automatically removed. + +Basically, the debug information allows you to compile a program with +"``-O0 -g``" and get full debug information, allowing you to arbitrarily modify +the program as it executes from a debugger. Compiling a program with +"``-O3 -g``" gives you full debug information that is always available and +accurate for reading (e.g., you get accurate stack traces despite tail call +elimination and inlining), but you might lose the ability to modify the program +and call functions where were optimized out of the program, or inlined away +completely. + +:ref:`LLVM test suite ` provides a framework to test +optimizer's handling of debugging information. It can be run like this: + +.. code-block:: bash + + % cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level + % make TEST=dbgopt + +This will test impact of debugging information on optimization passes. If +debugging information influences optimization passes then it will be reported +as a failure. See :doc:`TestingGuide` for more information on LLVM test +infrastructure and how to run various tests. + +.. _format: + +Debugging information format +============================ + +LLVM debugging information has been carefully designed to make it possible for +the optimizer to optimize the program and debugging information without +necessarily having to know anything about debugging information. In +particular, the use of metadata avoids duplicated debugging information from +the beginning, and the global dead code elimination pass automatically deletes +debugging information for a function if it decides to delete the function. + +To do this, most of the debugging information (descriptors for types, +variables, functions, source files, etc) is inserted by the language front-end +in the form of LLVM metadata. + +Debug information is designed to be agnostic about the target debugger and +debugging information representation (e.g. DWARF/Stabs/etc). It uses a generic +pass to decode the information that represents variables, types, functions, +namespaces, etc: this allows for arbitrary source-language semantics and +type-systems to be used, as long as there is a module written for the target +debugger to interpret the information. + +To provide basic functionality, the LLVM debugger does have to make some +assumptions about the source-level language being debugged, though it keeps +these to a minimum. The only common features that the LLVM debugger assumes +exist are :ref:`source files `, and :ref:`program objects +`. These abstract objects are used by a debugger to +form stack traces, show information about local variables, etc. + +This section of the documentation first describes the representation aspects +common to any source-language. :ref:`ccxx_frontend` describes the data layout +conventions used by the C and C++ front-ends. + +Debug information descriptors +----------------------------- + +In consideration of the complexity and volume of debug information, LLVM +provides a specification for well formed debug descriptors. + +Consumers of LLVM debug information expect the descriptors for program objects +to start in a canonical format, but the descriptors can include additional +information appended at the end that is source-language specific. All LLVM +debugging information is versioned, allowing backwards compatibility in the +case that the core structures need to change in some way. Also, all debugging +information objects start with a tag to indicate what type of object it is. +The source-language is allowed to define its own objects, by using unreserved +tag numbers. We recommend using with tags in the range 0x1000 through 0x2000 +(there is a defined ``enum DW_TAG_user_base = 0x1000``.) + +The fields of debug descriptors used internally by LLVM are restricted to only +the simple data types ``i32``, ``i1``, ``float``, ``double``, ``mdstring`` and +``mdnode``. + +.. code-block:: llvm + + !1 = metadata !{ + i32, ;; A tag + ... + } + +The first field of a descriptor is always an +``i32`` containing a tag value identifying the content of the descriptor. +The remaining fields are specific to the descriptor. The values of tags are +loosely bound to the tag values of DWARF information entries. However, that +does not restrict the use of the information supplied to DWARF targets. To +facilitate versioning of debug information, the tag is augmented with the +current debug version (``LLVMDebugVersion = 8 << 16`` or 0x80000 or +524288.) + +The details of the various descriptors follow. + +Compile unit descriptors +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !0 = metadata !{ + i32, ;; Tag = 17 + LLVMDebugVersion (DW_TAG_compile_unit) + i32, ;; Unused field. + i32, ;; DWARF language identifier (ex. DW_LANG_C89) + metadata, ;; Source file name + metadata, ;; Source file directory (includes trailing slash) + metadata ;; Producer (ex. "4.0.1 LLVM (LLVM research group)") + i1, ;; True if this is a main compile unit. + i1, ;; True if this is optimized. + metadata, ;; Flags + i32 ;; Runtime version + metadata ;; List of enums types + metadata ;; List of retained types + metadata ;; List of subprograms + metadata ;; List of global variables + } + +These descriptors contain a source language ID for the file (we use the DWARF +3.0 ID numbers, such as ``DW_LANG_C89``, ``DW_LANG_C_plus_plus``, +``DW_LANG_Cobol74``, etc), three strings describing the filename, working +directory of the compiler, and an identifier string for the compiler that +produced it. + +Compile unit descriptors provide the root context for objects declared in a +specific compilation unit. File descriptors are defined using this context. +These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They +keep track of subprograms, global variables and type information. + +.. _format_files: + +File descriptors +^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !0 = metadata !{ + i32, ;; Tag = 41 + LLVMDebugVersion (DW_TAG_file_type) + metadata, ;; Source file name + metadata, ;; Source file directory (includes trailing slash) + metadata ;; Unused + } + +These descriptors contain information for a file. Global variables and top +level functions would be defined using this context. File descriptors also +provide context for source line correspondence. + +Each input file is encoded as a separate file descriptor in LLVM debugging +information output. + +.. _format_global_variables: + +Global variable descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !1 = metadata !{ + i32, ;; Tag = 52 + LLVMDebugVersion (DW_TAG_variable) + i32, ;; Unused field. + metadata, ;; Reference to context descriptor + metadata, ;; Name + metadata, ;; Display name (fully qualified C++ name) + metadata, ;; MIPS linkage name (for C++) + metadata, ;; Reference to file where defined + i32, ;; Line number where defined + metadata, ;; Reference to type descriptor + i1, ;; True if the global is local to compile unit (static) + i1, ;; True if the global is defined in the compile unit (not extern) + {}* ;; Reference to the global variable + } + +These descriptors provide debug information about globals variables. They +provide details such as name, type and where the variable is defined. All +global variables are collected inside the named metadata ``!llvm.dbg.cu``. + +.. _format_subprograms: + +Subprogram descriptors +^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32, ;; Tag = 46 + LLVMDebugVersion (DW_TAG_subprogram) + i32, ;; Unused field. + metadata, ;; Reference to context descriptor + metadata, ;; Name + metadata, ;; Display name (fully qualified C++ name) + metadata, ;; MIPS linkage name (for C++) + metadata, ;; Reference to file where defined + i32, ;; Line number where defined + metadata, ;; Reference to type descriptor + i1, ;; True if the global is local to compile unit (static) + i1, ;; True if the global is defined in the compile unit (not extern) + i32, ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual + i32, ;; Index into a virtual function + metadata, ;; indicates which base type contains the vtable pointer for the + ;; derived class + i32, ;; Flags - Artifical, Private, Protected, Explicit, Prototyped. + i1, ;; isOptimized + Function * , ;; Pointer to LLVM function + metadata, ;; Lists function template parameters + metadata, ;; Function declaration descriptor + metadata, ;; List of function variables + i32 ;; Line number where the scope of the subprogram begins + } + +These descriptors provide debug information about functions, methods and +subprograms. They provide details such as name, return types and the source +location where the subprogram is defined. + +Block descriptors +^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !3 = metadata !{ + i32, ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block) + metadata,;; Reference to context descriptor + i32, ;; Line number + i32, ;; Column number + metadata,;; Reference to source file + i32 ;; Unique ID to identify blocks from a template function + } + +This descriptor provides debug information about nested blocks within a +subprogram. The line number and column numbers are used to dinstinguish two +lexical blocks at same depth. + +.. code-block:: llvm + + !3 = metadata !{ + i32, ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block) + metadata ;; Reference to the scope we're annotating with a file change + metadata,;; Reference to the file the scope is enclosed in. + } + +This descriptor provides a wrapper around a lexical scope to handle file +changes in the middle of a lexical block. + +.. _format_basic_type: + +Basic type descriptors +^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !4 = metadata !{ + i32, ;; Tag = 36 + LLVMDebugVersion (DW_TAG_base_type) + metadata, ;; Reference to context + metadata, ;; Name (may be "" for anonymous types) + metadata, ;; Reference to file where defined (may be NULL) + i32, ;; Line number where defined (may be 0) + i64, ;; Size in bits + i64, ;; Alignment in bits + i64, ;; Offset in bits + i32, ;; Flags + i32 ;; DWARF type encoding + } + +These descriptors define primitive types used in the code. Example ``int``, +``bool`` and ``float``. The context provides the scope of the type, which is +usually the top level. Since basic types are not usually user defined the +context and line number can be left as NULL and 0. The size, alignment and +offset are expressed in bits and can be 64 bit values. The alignment is used +to round the offset when embedded in a :ref:`composite type +` (example to keep float doubles on 64 bit boundaries). +The offset is the bit offset if embedded in a :ref:`composite type +`. + +The type encoding provides the details of the type. The values are typically +one of the following: + +.. code-block:: llvm + + DW_ATE_address = 1 + DW_ATE_boolean = 2 + DW_ATE_float = 4 + DW_ATE_signed = 5 + DW_ATE_signed_char = 6 + DW_ATE_unsigned = 7 + DW_ATE_unsigned_char = 8 + +.. _format_derived_type: + +Derived type descriptors +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !5 = metadata !{ + i32, ;; Tag (see below) + metadata, ;; Reference to context + metadata, ;; Name (may be "" for anonymous types) + metadata, ;; Reference to file where defined (may be NULL) + i32, ;; Line number where defined (may be 0) + i64, ;; Size in bits + i64, ;; Alignment in bits + i64, ;; Offset in bits + i32, ;; Flags to encode attributes, e.g. private + metadata, ;; Reference to type derived from + metadata, ;; (optional) Name of the Objective C property associated with + ;; Objective-C an ivar, or the type of which this + ;; pointer-to-member is pointing to members of. + metadata, ;; (optional) Name of the Objective C property getter selector. + metadata, ;; (optional) Name of the Objective C property setter selector. + i32 ;; (optional) Objective C property attributes. + } + +These descriptors are used to define types derived from other types. The value +of the tag varies depending on the meaning. The following are possible tag +values: + +.. code-block:: llvm + + DW_TAG_formal_parameter = 5 + DW_TAG_member = 13 + DW_TAG_pointer_type = 15 + DW_TAG_reference_type = 16 + DW_TAG_typedef = 22 + DW_TAG_ptr_to_member_type = 31 + DW_TAG_const_type = 38 + DW_TAG_volatile_type = 53 + DW_TAG_restrict_type = 55 + +``DW_TAG_member`` is used to define a member of a :ref:`composite type +` or :ref:`subprogram `. The type +of the member is the :ref:`derived type `. +``DW_TAG_formal_parameter`` is used to define a member which is a formal +argument of a subprogram. + +``DW_TAG_typedef`` is used to provide a name for the derived type. + +``DW_TAG_pointer_type``, ``DW_TAG_reference_type``, ``DW_TAG_const_type``, +``DW_TAG_volatile_type`` and ``DW_TAG_restrict_type`` are used to qualify the +:ref:`derived type `. + +:ref:`Derived type ` location can be determined from the +context and line number. The size, alignment and offset are expressed in bits +and can be 64 bit values. The alignment is used to round the offset when +embedded in a :ref:`composite type ` (example to keep +float doubles on 64 bit boundaries.) The offset is the bit offset if embedded +in a :ref:`composite type `. + +Note that the ``void *`` type is expressed as a type derived from NULL. + +.. _format_composite_type: + +Composite type descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !6 = metadata !{ + i32, ;; Tag (see below) + metadata, ;; Reference to context + metadata, ;; Name (may be "" for anonymous types) + metadata, ;; Reference to file where defined (may be NULL) + i32, ;; Line number where defined (may be 0) + i64, ;; Size in bits + i64, ;; Alignment in bits + i64, ;; Offset in bits + i32, ;; Flags + metadata, ;; Reference to type derived from + metadata, ;; Reference to array of member descriptors + i32 ;; Runtime languages + } + +These descriptors are used to define types that are composed of 0 or more +elements. The value of the tag varies depending on the meaning. The following +are possible tag values: + +.. code-block:: llvm + + DW_TAG_array_type = 1 + DW_TAG_enumeration_type = 4 + DW_TAG_structure_type = 19 + DW_TAG_union_type = 23 + DW_TAG_subroutine_type = 21 + DW_TAG_inheritance = 28 + +The vector flag indicates that an array type is a native packed vector. + +The members of array types (tag = ``DW_TAG_array_type``) are +:ref:`subrange descriptors `, each +representing the range of subscripts at that level of indexing. + +The members of enumeration types (tag = ``DW_TAG_enumeration_type``) are +:ref:`enumerator descriptors `, each representing the +definition of enumeration value for the set. All enumeration type descriptors +are collected inside the named metadata ``!llvm.dbg.cu``. + +The members of structure (tag = ``DW_TAG_structure_type``) or union (tag = +``DW_TAG_union_type``) types are any one of the :ref:`basic +`, :ref:`derived ` or :ref:`composite +` type descriptors, each representing a field member of +the structure or union. + +For C++ classes (tag = ``DW_TAG_structure_type``), member descriptors provide +information about base classes, static members and member functions. If a +member is a :ref:`derived type descriptor ` and has a tag +of ``DW_TAG_inheritance``, then the type represents a base class. If the member +of is a :ref:`global variable descriptor ` then it +represents a static member. And, if the member is a :ref:`subprogram +descriptor ` then it represents a member function. For +static members and member functions, ``getName()`` returns the members link or +the C++ mangled name. ``getDisplayName()`` the simplied version of the name. + +The first member of subroutine (tag = ``DW_TAG_subroutine_type``) type elements +is the return type for the subroutine. The remaining elements are the formal +arguments to the subroutine. + +:ref:`Composite type ` location can be determined from +the context and line number. The size, alignment and offset are expressed in +bits and can be 64 bit values. The alignment is used to round the offset when +embedded in a :ref:`composite type ` (as an example, to +keep float doubles on 64 bit boundaries). The offset is the bit offset if +embedded in a :ref:`composite type `. + +.. _format_subrange: + +Subrange descriptors +^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !42 = metadata !{ + i32, ;; Tag = 33 + LLVMDebugVersion (DW_TAG_subrange_type) + i64, ;; Low value + i64 ;; High value + } + +These descriptors are used to define ranges of array subscripts for an array +:ref:`composite type `. The low value defines the lower +bounds typically zero for C/C++. The high value is the upper bounds. Values +are 64 bit. ``High - Low + 1`` is the size of the array. If ``Low > High`` +the array bounds are not included in generated debugging information. + +.. _format_enumerator: + +Enumerator descriptors +^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !6 = metadata !{ + i32, ;; Tag = 40 + LLVMDebugVersion (DW_TAG_enumerator) + metadata, ;; Name + i64 ;; Value + } + +These descriptors are used to define members of an enumeration :ref:`composite +type `, it associates the name to the value. + +Local variables +^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !7 = metadata !{ + i32, ;; Tag (see below) + metadata, ;; Context + metadata, ;; Name + metadata, ;; Reference to file where defined + i32, ;; 24 bit - Line number where defined + ;; 8 bit - Argument number. 1 indicates 1st argument. + metadata, ;; Type descriptor + i32, ;; flags + metadata ;; (optional) Reference to inline location + } + +These descriptors are used to define variables local to a sub program. The +value of the tag depends on the usage of the variable: + +.. code-block:: llvm + + DW_TAG_auto_variable = 256 + DW_TAG_arg_variable = 257 + +An auto variable is any variable declared in the body of the function. An +argument variable is any variable that appears as a formal argument to the +function. + +The context is either the subprogram or block where the variable is defined. +Name the source variable name. Context and line indicate where the variable +was defined. Type descriptor defines the declared type of the variable. + +.. _format_common_intrinsics: + +Debugger intrinsic functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +LLVM uses several intrinsic functions (name prefixed with "``llvm.dbg``") to +provide debug information at various points in generated code. + +``llvm.dbg.declare`` +^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + void %llvm.dbg.declare(metadata, metadata) + +This intrinsic provides information about a local element (e.g., variable). +The first argument is metadata holding the alloca for the variable. The second +argument is metadata containing a description of the variable. + +``llvm.dbg.value`` +^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + void %llvm.dbg.value(metadata, i64, metadata) + +This intrinsic provides information when a user source variable is set to a new +value. The first argument is the new value (wrapped as metadata). The second +argument is the offset in the user source variable where the new value is +written. The third argument is metadata containing a description of the user +source variable. + +Object lifetimes and scoping +============================ + +In many languages, the local variables in functions can have their lifetimes or +scopes limited to a subset of a function. In the C family of languages, for +example, variables are only live (readable and writable) within the source +block that they are defined in. In functional languages, values are only +readable after they have been defined. Though this is a very obvious concept, +it is non-trivial to model in LLVM, because it has no notion of scoping in this +sense, and does not want to be tied to a language's scoping rules. + +In order to handle this, the LLVM debug format uses the metadata attached to +llvm instructions to encode line number and scoping information. Consider the +following C fragment, for example: + +.. code-block:: c + + 1. void foo() { + 2. int X = 21; + 3. int Y = 22; + 4. { + 5. int Z = 23; + 6. Z = X; + 7. } + 8. X = Y; + 9. } + +Compiled to LLVM, this function would be represented like this: + +.. code-block:: llvm + + define void @foo() nounwind ssp { + entry: + %X = alloca i32, align 4 ; [#uses=4] + %Y = alloca i32, align 4 ; [#uses=4] + %Z = alloca i32, align 4 ; [#uses=3] + %0 = bitcast i32* %X to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7 + store i32 21, i32* %X, !dbg !8 + %1 = bitcast i32* %Y to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10 + store i32 22, i32* %Y, !dbg !11 + %2 = bitcast i32* %Z to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14 + store i32 23, i32* %Z, !dbg !15 + %tmp = load i32* %X, !dbg !16 ; [#uses=1] + %tmp1 = load i32* %Y, !dbg !16 ; [#uses=1] + %add = add nsw i32 %tmp, %tmp1, !dbg !16 ; [#uses=1] + store i32 %add, i32* %Z, !dbg !16 + %tmp2 = load i32* %Y, !dbg !17 ; [#uses=1] + store i32 %tmp2, i32* %X, !dbg !17 + ret void, !dbg !18 + } + + declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + + !0 = metadata !{i32 459008, metadata !1, metadata !"X", + metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ] + !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ] + !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", + metadata !"foo", metadata !3, i32 1, metadata !4, + i1 false, i1 true}; [DW_TAG_subprogram ] + !3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", + metadata !"/private/tmp", metadata !"clang 1.1", i1 true, + i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ] + !4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, + i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ] + !5 = metadata !{null} + !6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, + i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ] + !7 = metadata !{i32 2, i32 7, metadata !1, null} + !8 = metadata !{i32 2, i32 3, metadata !1, null} + !9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3, + metadata !6}; [ DW_TAG_auto_variable ] + !10 = metadata !{i32 3, i32 7, metadata !1, null} + !11 = metadata !{i32 3, i32 3, metadata !1, null} + !12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5, + metadata !6}; [ DW_TAG_auto_variable ] + !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ] + !14 = metadata !{i32 5, i32 9, metadata !13, null} + !15 = metadata !{i32 5, i32 5, metadata !13, null} + !16 = metadata !{i32 6, i32 5, metadata !13, null} + !17 = metadata !{i32 8, i32 3, metadata !1, null} + !18 = metadata !{i32 9, i32 1, metadata !2, null} + +This example illustrates a few important details about LLVM debugging +information. In particular, it shows how the ``llvm.dbg.declare`` intrinsic and +location information, which are attached to an instruction, are applied +together to allow a debugger to analyze the relationship between statements, +variable definitions, and the code used to implement the function. + +.. code-block:: llvm + + call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7 + +The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the +variable ``X``. The metadata ``!dbg !7`` attached to the intrinsic provides +scope information for the variable ``X``. + +.. code-block:: llvm + + !7 = metadata !{i32 2, i32 7, metadata !1, null} + !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ] + !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", + metadata !"foo", metadata !"foo", metadata !3, i32 1, + metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ] + +Here ``!7`` is metadata providing location information. It has four fields: +line number, column number, scope, and original scope. The original scope +represents inline location if this instruction is inlined inside a caller, and +is null otherwise. In this example, scope is encoded by ``!1``. ``!1`` +represents a lexical block inside the scope ``!2``, where ``!2`` is a +:ref:`subprogram descriptor `. This way the location +information attached to the intrinsics indicates that the variable ``X`` is +declared at line number 2 at a function level scope in function ``foo``. + +Now lets take another example. + +.. code-block:: llvm + + call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14 + +The second intrinsic ``%llvm.dbg.declare`` encodes debugging information for +variable ``Z``. The metadata ``!dbg !14`` attached to the intrinsic provides +scope information for the variable ``Z``. + +.. code-block:: llvm + + !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ] + !14 = metadata !{i32 5, i32 9, metadata !13, null} + +Here ``!14`` indicates that ``Z`` is declared at line number 5 and +column number 9 inside of lexical scope ``!13``. The lexical scope itself +resides inside of lexical scope ``!1`` described above. + +The scope information attached with each instruction provides a straightforward +way to find instructions covered by a scope. + +.. _ccxx_frontend: + +C/C++ front-end specific debug information +========================================== + +The C and C++ front-ends represent information about the program in a format +that is effectively identical to `DWARF 3.0 +`_ in terms of information +content. This allows code generators to trivially support native debuggers by +generating standard dwarf information, and contains enough information for +non-dwarf targets to translate it as needed. + +This section describes the forms used to represent C and C++ programs. Other +languages could pattern themselves after this (which itself is tuned to +representing programs in the same way that DWARF 3 does), or they could choose +to provide completely different forms if they don't fit into the DWARF model. +As support for debugging information gets added to the various LLVM +source-language front-ends, the information used should be documented here. + +The following sections provide examples of various C/C++ constructs and the +debug information that would best describe those constructs. + +C/C++ source file information +----------------------------- + +Given the source files ``MySource.cpp`` and ``MyHeader.h`` located in the +directory ``/Users/mine/sources``, the following code: + +.. code-block:: c + + #include "MyHeader.h" + + int main(int argc, char *argv[]) { + return 0; + } + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ... + ;; + ;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp". + ;; + !2 = metadata !{ + i32 524305, ;; Tag + i32 0, ;; Unused + i32 4, ;; Language Id + metadata !"MySource.cpp", + metadata !"/Users/mine/sources", + metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", + i1 true, ;; Main Compile Unit + i1 false, ;; Optimized compile unit + metadata !"", ;; Compiler flags + i32 0} ;; Runtime version + + ;; + ;; Define the file for the file "/Users/mine/sources/MySource.cpp". + ;; + !1 = metadata !{ + i32 524329, ;; Tag + metadata !"MySource.cpp", + metadata !"/Users/mine/sources", + metadata !2 ;; Compile unit + } + + ;; + ;; Define the file for the file "/Users/mine/sources/Myheader.h" + ;; + !3 = metadata !{ + i32 524329, ;; Tag + metadata !"Myheader.h" + metadata !"/Users/mine/sources", + metadata !2 ;; Compile unit + } + + ... + +``llvm::Instruction`` provides easy access to metadata attached with an +instruction. One can extract line number information encoded in LLVM IR using +``Instruction::getMetadata()`` and ``DILocation::getLineNumber()``. + +.. code-block:: c++ + + if (MDNode *N = I->getMetadata("dbg")) { // Here I is an LLVM instruction + DILocation Loc(N); // DILocation is in DebugInfo.h + unsigned Line = Loc.getLineNumber(); + StringRef File = Loc.getFilename(); + StringRef Dir = Loc.getDirectory(); + } + +C/C++ global variable information +--------------------------------- + +Given an integer global variable declared as follows: + +.. code-block:: c + + int MyGlobal = 100; + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ;; + ;; Define the global itself. + ;; + %MyGlobal = global int 100 + ... + ;; + ;; List of debug info of globals + ;; + !llvm.dbg.cu = !{!0} + + ;; Define the compile unit. + !0 = metadata !{ + i32 786449, ;; Tag + i32 0, ;; Context + i32 4, ;; Language + metadata !"foo.cpp", ;; File + metadata !"/Volumes/Data/tmp", ;; Directory + metadata !"clang version 3.1 ", ;; Producer + i1 true, ;; Deprecated field + i1 false, ;; "isOptimized"? + metadata !"", ;; Flags + i32 0, ;; Runtime Version + metadata !1, ;; Enum Types + metadata !1, ;; Retained Types + metadata !1, ;; Subprograms + metadata !3 ;; Global Variables + } ; [ DW_TAG_compile_unit ] + + ;; The Array of Global Variables + !3 = metadata !{ + metadata !4 + } + + !4 = metadata !{ + metadata !5 + } + + ;; + ;; Define the global variable itself. + ;; + !5 = metadata !{ + i32 786484, ;; Tag + i32 0, ;; Unused + null, ;; Unused + metadata !"MyGlobal", ;; Name + metadata !"MyGlobal", ;; Display Name + metadata !"", ;; Linkage Name + metadata !6, ;; File + i32 1, ;; Line + metadata !7, ;; Type + i32 0, ;; IsLocalToUnit + i32 1, ;; IsDefinition + i32* @MyGlobal ;; LLVM-IR Value + } ; [ DW_TAG_variable ] + + ;; + ;; Define the file + ;; + !6 = metadata !{ + i32 786473, ;; Tag + metadata !"foo.cpp", ;; File + metadata !"/Volumes/Data/tmp", ;; Directory + null ;; Unused + } ; [ DW_TAG_file_type ] + + ;; + ;; Define the type + ;; + !7 = metadata !{ + i32 786468, ;; Tag + null, ;; Unused + metadata !"int", ;; Name + null, ;; Unused + i32 0, ;; Line + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset + i32 0, ;; Flags + i32 5 ;; Encoding + } ; [ DW_TAG_base_type ] + +C/C++ function information +-------------------------- + +Given a function declared as follows: + +.. code-block:: c + + int main(int argc, char *argv[]) { + return 0; + } + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ;; + ;; Define the anchor for subprograms. Note that the second field of the + ;; anchor is 46, which is the same as the tag for subprograms + ;; (46 = DW_TAG_subprogram.) + ;; + !6 = metadata !{ + i32 524334, ;; Tag + i32 0, ;; Unused + metadata !1, ;; Context + metadata !"main", ;; Name + metadata !"main", ;; Display name + metadata !"main", ;; Linkage name + metadata !1, ;; File + i32 1, ;; Line number + metadata !4, ;; Type + i1 false, ;; Is local + i1 true, ;; Is definition + i32 0, ;; Virtuality attribute, e.g. pure virtual function + i32 0, ;; Index into virtual table for C++ methods + i32 0, ;; Type that holds virtual table. + i32 0, ;; Flags + i1 false, ;; True if this function is optimized + Function *, ;; Pointer to llvm::Function + null ;; Function template parameters + } + ;; + ;; Define the subprogram itself. + ;; + define i32 @main(i32 %argc, i8** %argv) { + ... + } + +C/C++ basic types +----------------- + +The following are the basic type descriptors for C/C++ core types: + +bool +^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"bool", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 8, ;; Size in Bits + i64 8, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 2 ;; Encoding + } + +char +^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"char", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 8, ;; Size in Bits + i64 8, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 6 ;; Encoding + } + +unsigned char +^^^^^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"unsigned char", + metadata !1, ;; File + i32 0, ;; Line number + i64 8, ;; Size in Bits + i64 8, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 8 ;; Encoding + } + +short +^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"short int", + metadata !1, ;; File + i32 0, ;; Line number + i64 16, ;; Size in Bits + i64 16, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 5 ;; Encoding + } + +unsigned short +^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"short unsigned int", + metadata !1, ;; File + i32 0, ;; Line number + i64 16, ;; Size in Bits + i64 16, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 7 ;; Encoding + } + +int +^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"int", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 5 ;; Encoding + } + +unsigned int +^^^^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"unsigned int", + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 7 ;; Encoding + } + +long long +^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"long long int", + metadata !1, ;; File + i32 0, ;; Line number + i64 64, ;; Size in Bits + i64 64, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 5 ;; Encoding + } + +unsigned long long +^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"long long unsigned int", + metadata !1, ;; File + i32 0, ;; Line number + i64 64, ;; Size in Bits + i64 64, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 7 ;; Encoding + } + +float +^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"float", + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 4 ;; Encoding + } + +double +^^^^^^ + +.. code-block:: llvm + + !2 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"double",;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 64, ;; Size in Bits + i64 64, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 4 ;; Encoding + } + +C/C++ derived types +------------------- + +Given the following as an example of C/C++ derived type: + +.. code-block:: c + + typedef const int *IntPtr; + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ;; + ;; Define the typedef "IntPtr". + ;; + !2 = metadata !{ + i32 524310, ;; Tag + metadata !1, ;; Context + metadata !"IntPtr", ;; Name + metadata !3, ;; File + i32 0, ;; Line number + i64 0, ;; Size in bits + i64 0, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !4 ;; Derived From type + } + ;; + ;; Define the pointer type. + ;; + !4 = metadata !{ + i32 524303, ;; Tag + metadata !1, ;; Context + metadata !"", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 64, ;; Size in bits + i64 64, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type + } + ;; + ;; Define the const type. + ;; + !5 = metadata !{ + i32 524326, ;; Tag + metadata !1, ;; Context + metadata !"", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !6 ;; Derived From type + } + ;; + ;; Define the int type. + ;; + !6 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"int", ;; Name + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + 5 ;; Encoding + } + +C/C++ struct/union types +------------------------ + +Given the following as an example of C/C++ struct type: + +.. code-block:: c + + struct Color { + unsigned Red; + unsigned Green; + unsigned Blue; + }; + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ;; + ;; Define basic type for unsigned int. + ;; + !5 = metadata !{ + i32 524324, ;; Tag + metadata !1, ;; Context + metadata !"unsigned int", + metadata !1, ;; File + i32 0, ;; Line number + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 7 ;; Encoding + } + ;; + ;; Define composite type for struct Color. + ;; + !2 = metadata !{ + i32 524307, ;; Tag + metadata !1, ;; Context + metadata !"Color", ;; Name + metadata !1, ;; Compile unit + i32 1, ;; Line number + i64 96, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + null, ;; Derived From + metadata !3, ;; Elements + i32 0 ;; Runtime Language + } + + ;; + ;; Define the Red field. + ;; + !4 = metadata !{ + i32 524301, ;; Tag + metadata !1, ;; Context + metadata !"Red", ;; Name + metadata !1, ;; File + i32 2, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type + } + + ;; + ;; Define the Green field. + ;; + !6 = metadata !{ + i32 524301, ;; Tag + metadata !1, ;; Context + metadata !"Green", ;; Name + metadata !1, ;; File + i32 3, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 32, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type + } + + ;; + ;; Define the Blue field. + ;; + !7 = metadata !{ + i32 524301, ;; Tag + metadata !1, ;; Context + metadata !"Blue", ;; Name + metadata !1, ;; File + i32 4, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 64, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type + } + + ;; + ;; Define the array of fields used by the composite type Color. + ;; + !3 = metadata !{metadata !4, metadata !6, metadata !7} + +C/C++ enumeration types +----------------------- + +Given the following as an example of C/C++ enumeration type: + +.. code-block:: c + + enum Trees { + Spruce = 100, + Oak = 200, + Maple = 300 + }; + +a C/C++ front-end would generate the following descriptors: + +.. code-block:: llvm + + ;; + ;; Define composite type for enum Trees + ;; + !2 = metadata !{ + i32 524292, ;; Tag + metadata !1, ;; Context + metadata !"Trees", ;; Name + metadata !1, ;; File + i32 1, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + null, ;; Derived From type + metadata !3, ;; Elements + i32 0 ;; Runtime language + } + + ;; + ;; Define the array of enumerators used by composite type Trees. + ;; + !3 = metadata !{metadata !4, metadata !5, metadata !6} + + ;; + ;; Define Spruce enumerator. + ;; + !4 = metadata !{i32 524328, metadata !"Spruce", i64 100} + + ;; + ;; Define Oak enumerator. + ;; + !5 = metadata !{i32 524328, metadata !"Oak", i64 200} + + ;; + ;; Define Maple enumerator. + ;; + !6 = metadata !{i32 524328, metadata !"Maple", i64 300} + +Debugging information format +============================ + +Debugging Information Extension for Objective C Properties +---------------------------------------------------------- + +Introduction +^^^^^^^^^^^^ + +Objective C provides a simpler way to declare and define accessor methods using +declared properties. The language provides features to declare a property and +to let compiler synthesize accessor methods. + +The debugger lets developer inspect Objective C interfaces and their instance +variables and class variables. However, the debugger does not know anything +about the properties defined in Objective C interfaces. The debugger consumes +information generated by compiler in DWARF format. The format does not support +encoding of Objective C properties. This proposal describes DWARF extensions to +encode Objective C properties, which the debugger can use to let developers +inspect Objective C properties. + +Proposal +^^^^^^^^ + +Objective C properties exist separately from class members. A property can be +defined only by "setter" and "getter" selectors, and be calculated anew on each +access. Or a property can just be a direct access to some declared ivar. +Finally it can have an ivar "automatically synthesized" for it by the compiler, +in which case the property can be referred to in user code directly using the +standard C dereference syntax as well as through the property "dot" syntax, but +there is no entry in the ``@interface`` declaration corresponding to this ivar. + +To facilitate debugging, these properties we will add a new DWARF TAG into the +``DW_TAG_structure_type`` definition for the class to hold the description of a +given property, and a set of DWARF attributes that provide said description. +The property tag will also contain the name and declared type of the property. + +If there is a related ivar, there will also be a DWARF property attribute placed +in the ``DW_TAG_member`` DIE for that ivar referring back to the property TAG +for that property. And in the case where the compiler synthesizes the ivar +directly, the compiler is expected to generate a ``DW_TAG_member`` for that +ivar (with the ``DW_AT_artificial`` set to 1), whose name will be the name used +to access this ivar directly in code, and with the property attribute pointing +back to the property it is backing. + +The following examples will serve as illustration for our discussion: + +.. code-block:: objc + + @interface I1 { + int n2; + } + + @property int p1; + @property int p2; + @end + + @implementation I1 + @synthesize p1; + @synthesize p2 = n2; + @end + +This produces the following DWARF (this is a "pseudo dwarfdump" output): + +.. code-block:: none + + 0x00000100: TAG_structure_type [7] * + AT_APPLE_runtime_class( 0x10 ) + AT_name( "I1" ) + AT_decl_file( "Objc_Property.m" ) + AT_decl_line( 3 ) + + 0x00000110 TAG_APPLE_property + AT_name ( "p1" ) + AT_type ( {0x00000150} ( int ) ) + + 0x00000120: TAG_APPLE_property + AT_name ( "p2" ) + AT_type ( {0x00000150} ( int ) ) + + 0x00000130: TAG_member [8] + AT_name( "_p1" ) + AT_APPLE_property ( {0x00000110} "p1" ) + AT_type( {0x00000150} ( int ) ) + AT_artificial ( 0x1 ) + + 0x00000140: TAG_member [8] + AT_name( "n2" ) + AT_APPLE_property ( {0x00000120} "p2" ) + AT_type( {0x00000150} ( int ) ) + + 0x00000150: AT_type( ( int ) ) + +Note, the current convention is that the name of the ivar for an +auto-synthesized property is the name of the property from which it derives +with an underscore prepended, as is shown in the example. But we actually +don't need to know this convention, since we are given the name of the ivar +directly. + +Also, it is common practice in ObjC to have different property declarations in +the @interface and @implementation - e.g. to provide a read-only property in +the interface,and a read-write interface in the implementation. In that case, +the compiler should emit whichever property declaration will be in force in the +current translation unit. + +Developers can decorate a property with attributes which are encoded using +``DW_AT_APPLE_property_attribute``. + +.. code-block:: objc + + @property (readonly, nonatomic) int pr; + +.. code-block:: none + + TAG_APPLE_property [8] + AT_name( "pr" ) + AT_type ( {0x00000147} (int) ) + AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic) + +The setter and getter method names are attached to the property using +``DW_AT_APPLE_property_setter`` and ``DW_AT_APPLE_property_getter`` attributes. + +.. code-block:: objc + + @interface I1 + @property (setter=myOwnP3Setter:) int p3; + -(void)myOwnP3Setter:(int)a; + @end + + @implementation I1 + @synthesize p3; + -(void)myOwnP3Setter:(int)a{ } + @end + +The DWARF for this would be: + +.. code-block:: none + + 0x000003bd: TAG_structure_type [7] * + AT_APPLE_runtime_class( 0x10 ) + AT_name( "I1" ) + AT_decl_file( "Objc_Property.m" ) + AT_decl_line( 3 ) + + 0x000003cd TAG_APPLE_property + AT_name ( "p3" ) + AT_APPLE_property_setter ( "myOwnP3Setter:" ) + AT_type( {0x00000147} ( int ) ) + + 0x000003f3: TAG_member [8] + AT_name( "_p3" ) + AT_type ( {0x00000147} ( int ) ) + AT_APPLE_property ( {0x000003cd} ) + AT_artificial ( 0x1 ) + +New DWARF Tags +^^^^^^^^^^^^^^ + ++-----------------------+--------+ +| TAG | Value | ++=======================+========+ +| DW_TAG_APPLE_property | 0x4200 | ++-----------------------+--------+ + +New DWARF Attributes +^^^^^^^^^^^^^^^^^^^^ + ++--------------------------------+--------+-----------+ +| Attribute | Value | Classes | ++================================+========+===========+ +| DW_AT_APPLE_property | 0x3fed | Reference | ++--------------------------------+--------+-----------+ +| DW_AT_APPLE_property_getter | 0x3fe9 | String | ++--------------------------------+--------+-----------+ +| DW_AT_APPLE_property_setter | 0x3fea | String | ++--------------------------------+--------+-----------+ +| DW_AT_APPLE_property_attribute | 0x3feb | Constant | ++--------------------------------+--------+-----------+ + +New DWARF Constants +^^^^^^^^^^^^^^^^^^^ + ++--------------------------------+-------+ +| Name | Value | ++================================+=======+ +| DW_AT_APPLE_PROPERTY_readonly | 0x1 | ++--------------------------------+-------+ +| DW_AT_APPLE_PROPERTY_readwrite | 0x2 | ++--------------------------------+-------+ +| DW_AT_APPLE_PROPERTY_assign | 0x4 | ++--------------------------------+-------+ +| DW_AT_APPLE_PROPERTY_retain | 0x8 | ++--------------------------------+-------+ +| DW_AT_APPLE_PROPERTY_copy | 0x10 | ++--------------------------------+-------+ +| DW_AT_APPLE_PROPERTY_nonatomic | 0x20 | ++--------------------------------+-------+ + +Name Accelerator Tables +----------------------- + +Introduction +^^^^^^^^^^^^ + +The "``.debug_pubnames``" and "``.debug_pubtypes``" formats are not what a +debugger needs. The "``pub``" in the section name indicates that the entries +in the table are publicly visible names only. This means no static or hidden +functions show up in the "``.debug_pubnames``". No static variables or private +class variables are in the "``.debug_pubtypes``". Many compilers add different +things to these tables, so we can't rely upon the contents between gcc, icc, or +clang. + +The typical query given by users tends not to match up with the contents of +these tables. For example, the DWARF spec states that "In the case of the name +of a function member or static data member of a C++ structure, class or union, +the name presented in the "``.debug_pubnames``" section is not the simple name +given by the ``DW_AT_name attribute`` of the referenced debugging information +entry, but rather the fully qualified name of the data or function member." +So the only names in these tables for complex C++ entries is a fully +qualified name. Debugger users tend not to enter their search strings as +"``a::b::c(int,const Foo&) const``", but rather as "``c``", "``b::c``" , or +"``a::b::c``". So the name entered in the name table must be demangled in +order to chop it up appropriately and additional names must be manually entered +into the table to make it effective as a name lookup table for debuggers to +se. + +All debuggers currently ignore the "``.debug_pubnames``" table as a result of +its inconsistent and useless public-only name content making it a waste of +space in the object file. These tables, when they are written to disk, are not +sorted in any way, leaving every debugger to do its own parsing and sorting. +These tables also include an inlined copy of the string values in the table +itself making the tables much larger than they need to be on disk, especially +for large C++ programs. + +Can't we just fix the sections by adding all of the names we need to this +table? No, because that is not what the tables are defined to contain and we +won't know the difference between the old bad tables and the new good tables. +At best we could make our own renamed sections that contain all of the data we +need. + +These tables are also insufficient for what a debugger like LLDB needs. LLDB +uses clang for its expression parsing where LLDB acts as a PCH. LLDB is then +often asked to look for type "``foo``" or namespace "``bar``", or list items in +namespace "``baz``". Namespaces are not included in the pubnames or pubtypes +tables. Since clang asks a lot of questions when it is parsing an expression, +we need to be very fast when looking up names, as it happens a lot. Having new +accelerator tables that are optimized for very quick lookups will benefit this +type of debugging experience greatly. + +We would like to generate name lookup tables that can be mapped into memory +from disk, and used as is, with little or no up-front parsing. We would also +be able to control the exact content of these different tables so they contain +exactly what we need. The Name Accelerator Tables were designed to fix these +issues. In order to solve these issues we need to: + +* Have a format that can be mapped into memory from disk and used as is +* Lookups should be very fast +* Extensible table format so these tables can be made by many producers +* Contain all of the names needed for typical lookups out of the box +* Strict rules for the contents of tables + +Table size is important and the accelerator table format should allow the reuse +of strings from common string tables so the strings for the names are not +duplicated. We also want to make sure the table is ready to be used as-is by +simply mapping the table into memory with minimal header parsing. + +The name lookups need to be fast and optimized for the kinds of lookups that +debuggers tend to do. Optimally we would like to touch as few parts of the +mapped table as possible when doing a name lookup and be able to quickly find +the name entry we are looking for, or discover there are no matches. In the +case of debuggers we optimized for lookups that fail most of the time. + +Each table that is defined should have strict rules on exactly what is in the +accelerator tables and documented so clients can rely on the content. + +Hash Tables +^^^^^^^^^^^ + +Standard Hash Tables +"""""""""""""""""""" + +Typical hash tables have a header, buckets, and each bucket points to the +bucket contents: + +.. code-block:: none + + .------------. + | HEADER | + |------------| + | BUCKETS | + |------------| + | DATA | + `------------' + +The BUCKETS are an array of offsets to DATA for each hash: + +.. code-block:: none + + .------------. + | 0x00001000 | BUCKETS[0] + | 0x00002000 | BUCKETS[1] + | 0x00002200 | BUCKETS[2] + | 0x000034f0 | BUCKETS[3] + | | ... + | 0xXXXXXXXX | BUCKETS[n_buckets] + '------------' + +So for ``bucket[3]`` in the example above, we have an offset into the table +0x000034f0 which points to a chain of entries for the bucket. Each bucket must +contain a next pointer, full 32 bit hash value, the string itself, and the data +for the current string value. + +.. code-block:: none + + .------------. + 0x000034f0: | 0x00003500 | next pointer + | 0x12345678 | 32 bit hash + | "erase" | string value + | data[n] | HashData for this bucket + |------------| + 0x00003500: | 0x00003550 | next pointer + | 0x29273623 | 32 bit hash + | "dump" | string value + | data[n] | HashData for this bucket + |------------| + 0x00003550: | 0x00000000 | next pointer + | 0x82638293 | 32 bit hash + | "main" | string value + | data[n] | HashData for this bucket + `------------' + +The problem with this layout for debuggers is that we need to optimize for the +negative lookup case where the symbol we're searching for is not present. So +if we were to lookup "``printf``" in the table above, we would make a 32 hash +for "``printf``", it might match ``bucket[3]``. We would need to go to the +offset 0x000034f0 and start looking to see if our 32 bit hash matches. To do +so, we need to read the next pointer, then read the hash, compare it, and skip +to the next bucket. Each time we are skipping many bytes in memory and +touching new cache pages just to do the compare on the full 32 bit hash. All +of these accesses then tell us that we didn't have a match. + +Name Hash Tables +"""""""""""""""" + +To solve the issues mentioned above we have structured the hash tables a bit +differently: a header, buckets, an array of all unique 32 bit hash values, +followed by an array of hash value data offsets, one for each hash value, then +the data for all hash values: + +.. code-block:: none + + .-------------. + | HEADER | + |-------------| + | BUCKETS | + |-------------| + | HASHES | + |-------------| + | OFFSETS | + |-------------| + | DATA | + `-------------' + +The ``BUCKETS`` in the name tables are an index into the ``HASHES`` array. By +making all of the full 32 bit hash values contiguous in memory, we allow +ourselves to efficiently check for a match while touching as little memory as +possible. Most often checking the 32 bit hash values is as far as the lookup +goes. If it does match, it usually is a match with no collisions. So for a +table with "``n_buckets``" buckets, and "``n_hashes``" unique 32 bit hash +values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and +``OFFSETS`` as: + +.. code-block:: none + + .-------------------------. + | HEADER.magic | uint32_t + | HEADER.version | uint16_t + | HEADER.hash_function | uint16_t + | HEADER.bucket_count | uint32_t + | HEADER.hashes_count | uint32_t + | HEADER.header_data_len | uint32_t + | HEADER_DATA | HeaderData + |-------------------------| + | BUCKETS | uint32_t[n_buckets] // 32 bit hash indexes + |-------------------------| + | HASHES | uint32_t[n_hashes] // 32 bit hash values + |-------------------------| + | OFFSETS | uint32_t[n_hashes] // 32 bit offsets to hash value data + |-------------------------| + | ALL HASH DATA | + `-------------------------' + +So taking the exact same data from the standard hash example above we end up +with: + +.. code-block:: none + + .------------. + | HEADER | + |------------| + | 0 | BUCKETS[0] + | 2 | BUCKETS[1] + | 5 | BUCKETS[2] + | 6 | BUCKETS[3] + | | ... + | ... | BUCKETS[n_buckets] + |------------| + | 0x........ | HASHES[0] + | 0x........ | HASHES[1] + | 0x........ | HASHES[2] + | 0x........ | HASHES[3] + | 0x........ | HASHES[4] + | 0x........ | HASHES[5] + | 0x12345678 | HASHES[6] hash for BUCKETS[3] + | 0x29273623 | HASHES[7] hash for BUCKETS[3] + | 0x82638293 | HASHES[8] hash for BUCKETS[3] + | 0x........ | HASHES[9] + | 0x........ | HASHES[10] + | 0x........ | HASHES[11] + | 0x........ | HASHES[12] + | 0x........ | HASHES[13] + | 0x........ | HASHES[n_hashes] + |------------| + | 0x........ | OFFSETS[0] + | 0x........ | OFFSETS[1] + | 0x........ | OFFSETS[2] + | 0x........ | OFFSETS[3] + | 0x........ | OFFSETS[4] + | 0x........ | OFFSETS[5] + | 0x000034f0 | OFFSETS[6] offset for BUCKETS[3] + | 0x00003500 | OFFSETS[7] offset for BUCKETS[3] + | 0x00003550 | OFFSETS[8] offset for BUCKETS[3] + | 0x........ | OFFSETS[9] + | 0x........ | OFFSETS[10] + | 0x........ | OFFSETS[11] + | 0x........ | OFFSETS[12] + | 0x........ | OFFSETS[13] + | 0x........ | OFFSETS[n_hashes] + |------------| + | | + | | + | | + | | + | | + |------------| + 0x000034f0: | 0x00001203 | .debug_str ("erase") + | 0x00000004 | A 32 bit array count - number of HashData with name "erase" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x........ | HashData[3] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + |------------| + 0x00003500: | 0x00001203 | String offset into .debug_str ("collision") + | 0x00000002 | A 32 bit array count - number of HashData with name "collision" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x00001203 | String offset into .debug_str ("dump") + | 0x00000003 | A 32 bit array count - number of HashData with name "dump" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + |------------| + 0x00003550: | 0x00001203 | String offset into .debug_str ("main") + | 0x00000009 | A 32 bit array count - number of HashData with name "main" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x........ | HashData[3] + | 0x........ | HashData[4] + | 0x........ | HashData[5] + | 0x........ | HashData[6] + | 0x........ | HashData[7] + | 0x........ | HashData[8] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + `------------' + +So we still have all of the same data, we just organize it more efficiently for +debugger lookup. If we repeat the same "``printf``" lookup from above, we +would hash "``printf``" and find it matches ``BUCKETS[3]`` by taking the 32 bit +hash value and modulo it by ``n_buckets``. ``BUCKETS[3]`` contains "6" which +is the index into the ``HASHES`` table. We would then compare any consecutive +32 bit hashes values in the ``HASHES`` array as long as the hashes would be in +``BUCKETS[3]``. We do this by verifying that each subsequent hash value modulo +``n_buckets`` is still 3. In the case of a failed lookup we would access the +memory for ``BUCKETS[3]``, and then compare a few consecutive 32 bit hashes +before we know that we have no match. We don't end up marching through +multiple words of memory and we really keep the number of processor data cache +lines being accessed as small as possible. + +The string hash that is used for these lookup tables is the Daniel J. +Bernstein hash which is also used in the ELF ``GNU_HASH`` sections. It is a +very good hash for all kinds of names in programs with very few hash +collisions. + +Empty buckets are designated by using an invalid hash index of ``UINT32_MAX``. + +Details +^^^^^^^ + +These name hash tables are designed to be generic where specializations of the +table get to define additional data that goes into the header ("``HeaderData``"), +how the string value is stored ("``KeyType``") and the content of the data for each +hash value. + +Header Layout +""""""""""""" + +The header has a fixed part, and the specialized part. The exact format of the +header is: + +.. code-block:: c + + struct Header + { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number + uint16_t hash_function; // The hash function enumeration that was used + uint32_t bucket_count; // The number of buckets in this hash table + uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table + uint32_t header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment + // Specifically the length of the following HeaderData field - this does not + // include the size of the preceding fields + HeaderData header_data; // Implementation specific header data + }; + +The header starts with a 32 bit "``magic``" value which must be ``'HASH'`` +encoded as an ASCII integer. This allows the detection of the start of the +hash table and also allows the table's byte order to be determined so the table +can be correctly extracted. The "``magic``" value is followed by a 16 bit +``version`` number which allows the table to be revised and modified in the +future. The current version number is 1. ``hash_function`` is a ``uint16_t`` +enumeration that specifies which hash function was used to produce this table. +The current values for the hash function enumerations include: + +.. code-block:: c + + enum HashFunctionType + { + eHashFunctionDJB = 0u, // Daniel J Bernstein hash function + }; + +``bucket_count`` is a 32 bit unsigned integer that represents how many buckets +are in the ``BUCKETS`` array. ``hashes_count`` is the number of unique 32 bit +hash values that are in the ``HASHES`` array, and is the same number of offsets +are contained in the ``OFFSETS`` array. ``header_data_len`` specifies the size +in bytes of the ``HeaderData`` that is filled in by specialized versions of +this table. + +Fixed Lookup +"""""""""""" + +The header is followed by the buckets, hashes, offsets, and hash value data. + +.. code-block:: c + + struct FixedTable + { + uint32_t buckets[Header.bucket_count]; // An array of hash indexes into the "hashes[]" array below + uint32_t hashes [Header.hashes_count]; // Every unique 32 bit hash for the entire table is in this table + uint32_t offsets[Header.hashes_count]; // An offset that corresponds to each item in the "hashes[]" array above + }; + +``buckets`` is an array of 32 bit indexes into the ``hashes`` array. The +``hashes`` array contains all of the 32 bit hash values for all names in the +hash table. Each hash in the ``hashes`` table has an offset in the ``offsets`` +array that points to the data for the hash value. + +This table setup makes it very easy to repurpose these tables to contain +different data, while keeping the lookup mechanism the same for all tables. +This layout also makes it possible to save the table to disk and map it in +later and do very efficient name lookups with little or no parsing. + +DWARF lookup tables can be implemented in a variety of ways and can store a lot +of information for each name. We want to make the DWARF tables extensible and +able to store the data efficiently so we have used some of the DWARF features +that enable efficient data storage to define exactly what kind of data we store +for each name. + +The ``HeaderData`` contains a definition of the contents of each HashData chunk. +We might want to store an offset to all of the debug information entries (DIEs) +for each name. To keep things extensible, we create a list of items, or +Atoms, that are contained in the data for each name. First comes the type of +the data in each atom: + +.. code-block:: c + + enum AtomType + { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2 + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags + }; + +The enumeration values and their meanings are: + +.. code-block:: none + + eAtomTypeNULL - a termination atom that specifies the end of the atom list + eAtomTypeDIEOffset - an offset into the .debug_info section for the DWARF DIE for this name + eAtomTypeCUOffset - an offset into the .debug_info section for the CU that contains the DIE + eAtomTypeDIETag - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is + eAtomTypeNameFlags - Flags for functions and global variables (isFunction, isInlined, isExternal...) + eAtomTypeTypeFlags - Flags for types (isCXXClass, isObjCClass, ...) + +Then we allow each atom type to define the atom type and how the data for each +atom type data is encoded: + +.. code-block:: c + + struct Atom + { + uint16_t type; // AtomType enum value + uint16_t form; // DWARF DW_FORM_XXX defines + }; + +The ``form`` type above is from the DWARF specification and defines the exact +encoding of the data for the Atom type. See the DWARF specification for the +``DW_FORM_`` definitions. + +.. code-block:: c + + struct HeaderData + { + uint32_t die_offset_base; + uint32_t atom_count; + Atoms atoms[atom_count0]; + }; + +``HeaderData`` defines the base DIE offset that should be added to any atoms +that are encoded using the ``DW_FORM_ref1``, ``DW_FORM_ref2``, +``DW_FORM_ref4``, ``DW_FORM_ref8`` or ``DW_FORM_ref_udata``. It also defines +what is contained in each ``HashData`` object -- ``Atom.form`` tells us how large +each field will be in the ``HashData`` and the ``Atom.type`` tells us how this data +should be interpreted. + +For the current implementations of the "``.apple_names``" (all functions + +globals), the "``.apple_types``" (names of all types that are defined), and +the "``.apple_namespaces``" (all namespaces), we currently set the ``Atom`` +array to be: + +.. code-block:: c + + HeaderData.atom_count = 1; + HeaderData.atoms[0].type = eAtomTypeDIEOffset; + HeaderData.atoms[0].form = DW_FORM_data4; + +This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is +encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have +multiple matching DIEs in a single file, which could come up with an inlined +function for instance. Future tables could include more information about the +DIE such as flags indicating if the DIE is a function, method, block, +or inlined. + +The KeyType for the DWARF table is a 32 bit string table offset into the +".debug_str" table. The ".debug_str" is the string table for the DWARF which +may already contain copies of all of the strings. This helps make sure, with +help from the compiler, that we reuse the strings between all of the DWARF +sections and keeps the hash table size down. Another benefit to having the +compiler generate all strings as DW_FORM_strp in the debug info, is that +DWARF parsing can be made much faster. + +After a lookup is made, we get an offset into the hash data. The hash data +needs to be able to deal with 32 bit hash collisions, so the chunk of data +at the offset in the hash data consists of a triple: + +.. code-block:: c + + uint32_t str_offset + uint32_t hash_data_count + HashData[hash_data_count] + +If "str_offset" is zero, then the bucket contents are done. 99.9% of the +hash data chunks contain a single item (no 32 bit hash collision): + +.. code-block:: none + + .------------. + | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") + | 0x00000004 | uint32_t HashData count + | 0x........ | uint32_t HashData[0] DIE offset + | 0x........ | uint32_t HashData[1] DIE offset + | 0x........ | uint32_t HashData[2] DIE offset + | 0x........ | uint32_t HashData[3] DIE offset + | 0x00000000 | uint32_t KeyType (end of hash chain) + `------------' + +If there are collisions, you will have multiple valid string offsets: + +.. code-block:: none + + .------------. + | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") + | 0x00000004 | uint32_t HashData count + | 0x........ | uint32_t HashData[0] DIE offset + | 0x........ | uint32_t HashData[1] DIE offset + | 0x........ | uint32_t HashData[2] DIE offset + | 0x........ | uint32_t HashData[3] DIE offset + | 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print") + | 0x00000002 | uint32_t HashData count + | 0x........ | uint32_t HashData[0] DIE offset + | 0x........ | uint32_t HashData[1] DIE offset + | 0x00000000 | uint32_t KeyType (end of hash chain) + `------------' + +Current testing with real world C++ binaries has shown that there is around 1 +32 bit hash collision per 100,000 name entries. + +Contents +^^^^^^^^ + +As we said, we want to strictly define exactly what is included in the +different tables. For DWARF, we have 3 tables: "``.apple_names``", +"``.apple_types``", and "``.apple_namespaces``". + +"``.apple_names``" sections should contain an entry for each DWARF DIE whose +``DW_TAG`` is a ``DW_TAG_label``, ``DW_TAG_inlined_subroutine``, or +``DW_TAG_subprogram`` that has address attributes: ``DW_AT_low_pc``, +``DW_AT_high_pc``, ``DW_AT_ranges`` or ``DW_AT_entry_pc``. It also contains +``DW_TAG_variable`` DIEs that have a ``DW_OP_addr`` in the location (global and +static variables). All global and static variables should be included, +including those scoped within functions and classes. For example using the +following code: + +.. code-block:: c + + static int var = 0; + + void f () + { + static int var = 0; + } + +Both of the static ``var`` variables would be included in the table. All +functions should emit both their full names and their basenames. For C or C++, +the full name is the mangled name (if available) which is usually in the +``DW_AT_MIPS_linkage_name`` attribute, and the ``DW_AT_name`` contains the +function basename. If global or static variables have a mangled name in a +``DW_AT_MIPS_linkage_name`` attribute, this should be emitted along with the +simple name found in the ``DW_AT_name`` attribute. + +"``.apple_types``" sections should contain an entry for each DWARF DIE whose +tag is one of: + +* DW_TAG_array_type +* DW_TAG_class_type +* DW_TAG_enumeration_type +* DW_TAG_pointer_type +* DW_TAG_reference_type +* DW_TAG_string_type +* DW_TAG_structure_type +* DW_TAG_subroutine_type +* DW_TAG_typedef +* DW_TAG_union_type +* DW_TAG_ptr_to_member_type +* DW_TAG_set_type +* DW_TAG_subrange_type +* DW_TAG_base_type +* DW_TAG_const_type +* DW_TAG_constant +* DW_TAG_file_type +* DW_TAG_namelist +* DW_TAG_packed_type +* DW_TAG_volatile_type +* DW_TAG_restrict_type +* DW_TAG_interface_type +* DW_TAG_unspecified_type +* DW_TAG_shared_type + +Only entries with a ``DW_AT_name`` attribute are included, and the entry must +not be a forward declaration (``DW_AT_declaration`` attribute with a non-zero +value). For example, using the following code: + +.. code-block:: c + + int main () + { + int *b = 0; + return *b; + } + +We get a few type DIEs: + +.. code-block:: none + + 0x00000067: TAG_base_type [5] + AT_encoding( DW_ATE_signed ) + AT_name( "int" ) + AT_byte_size( 0x04 ) + + 0x0000006e: TAG_pointer_type [6] + AT_type( {0x00000067} ( int ) ) + AT_byte_size( 0x08 ) + +The DW_TAG_pointer_type is not included because it does not have a ``DW_AT_name``. + +"``.apple_namespaces``" section should contain all ``DW_TAG_namespace`` DIEs. +If we run into a namespace that has no name this is an anonymous namespace, and +the name should be output as "``(anonymous namespace)``" (without the quotes). +Why? This matches the output of the ``abi::cxa_demangle()`` that is in the +standard C++ library that demangles mangled names. + + +Language Extensions and File Format Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Objective-C Extensions +"""""""""""""""""""""" + +"``.apple_objc``" section should contain all ``DW_TAG_subprogram`` DIEs for an +Objective-C class. The name used in the hash table is the name of the +Objective-C class itself. If the Objective-C class has a category, then an +entry is made for both the class name without the category, and for the class +name with the category. So if we have a DIE at offset 0x1234 with a name of +method "``-[NSString(my_additions) stringWithSpecialString:]``", we would add +an entry for "``NSString``" that points to DIE 0x1234, and an entry for +"``NSString(my_additions)``" that points to 0x1234. This allows us to quickly +track down all Objective-C methods for an Objective-C class when doing +expressions. It is needed because of the dynamic nature of Objective-C where +anyone can add methods to a class. The DWARF for Objective-C methods is also +emitted differently from C++ classes where the methods are not usually +contained in the class definition, they are scattered about across one or more +compile units. Categories can also be defined in different shared libraries. +So we need to be able to quickly find all of the methods and class functions +given the Objective-C class name, or quickly find all methods and class +functions for a class + category name. This table does not contain any +selector names, it just maps Objective-C class names (or class names + +category) to all of the methods and class functions. The selectors are added +as function basenames in the "``.debug_names``" section. + +In the "``.apple_names``" section for Objective-C functions, the full name is +the entire function name with the brackets ("``-[NSString +stringWithCString:]``") and the basename is the selector only +("``stringWithCString:``"). + +Mach-O Changes +"""""""""""""" + +The sections names for the apple hash tables are for non mach-o files. For +mach-o files, the sections should be contained in the ``__DWARF`` segment with +names as follows: + +* "``.apple_names``" -> "``__apple_names``" +* "``.apple_types``" -> "``__apple_types``" +* "``.apple_namespaces``" -> "``__apple_namespac``" (16 character limit) +* "``.apple_objc``" -> "``__apple_objc``" + diff --git a/docs/SphinxQuickstartTemplate.rst b/docs/SphinxQuickstartTemplate.rst index 75d916368e33..fe6e44a27cea 100644 --- a/docs/SphinxQuickstartTemplate.rst +++ b/docs/SphinxQuickstartTemplate.rst @@ -2,8 +2,6 @@ Sphinx Quickstart Template ========================== -.. sectionauthor:: Sean Silva - Introduction and Quickstart =========================== @@ -24,7 +22,8 @@ reStructuredText syntax is useful when writing the document, so the last ~half of this document (starting with `Example Section`_) gives examples which should cover 99% of use cases. -Let me say that again: focus on *content*. +Let me say that again: focus on *content*. But if you really need to verify +Sphinx's output, see ``docs/README.txt`` for information. Once you have finished with the content, please send the ``.rst`` file to llvm-commits for review. @@ -65,7 +64,7 @@ Your text can be *emphasized*, **bold**, or ``monospace``. Use blank lines to separate paragraphs. -Headings (like ``Example Section`` just above) give your document +Headings (like ``Example Section`` just above) give your document its structure. Use the same kind of adornments (e.g. ``======`` vs. ``------``) as are used in this document. The adornment must be the same length as the text above it. For Vim users, variations of ``yypVr=`` might be handy. @@ -86,7 +85,7 @@ Lists can be made like this: #. This is a second list element. - #. They nest too. + #. Use indentation to create nested lists. You can also use unordered lists. @@ -104,18 +103,54 @@ You can make blocks of code like this: .. code-block:: c++ int main() { - return 0 + return 0; } -For a shell session, use a ``bash`` code block: +For a shell session, use a ``console`` code block (some existing docs use +``bash``): -.. code-block:: bash +.. code-block:: console $ echo "Goodbye cruel world!" $ rm -rf / If you need to show LLVM IR use the ``llvm`` code block. +.. code-block:: llvm + + define i32 @test1() { + entry: + ret i32 0 + } + +Some other common code blocks you might need are ``c``, ``objc``, ``make``, +and ``cmake``. If you need something beyond that, you can look at the `full +list`_ of supported code blocks. + +.. _`full list`: http://pygments.org/docs/lexers/ + +However, don't waste time fiddling with syntax highlighting when you could +be adding meaningful content. When in doubt, show preformatted text +without any syntax highlighting like this: + +:: + + . + +:. + ..:: :: + .++:+:: ::+:.:. + .:+ : + ::.::..:: .+. + ..:+ :: : + ......+:. .. + :++. .. : + .+:::+:: : + .. . .+ :: + +.: .::+. + ...+. .: . + .++:.. + ... + Hopefully you won't need to be this deep """""""""""""""""""""""""""""""""""""""" diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html deleted file mode 100644 index 4b09e7cba13d..000000000000 --- a/docs/SystemLibrary.html +++ /dev/null @@ -1,316 +0,0 @@ - - - - - System Library - - - - -

System Library

-
- -
-

Written by Reid Spencer

-
- - - -

Abstract

-
-

This document provides some details on LLVM's System Library, located in - the source at lib/System and include/llvm/System. The - library's purpose is to shield LLVM from the differences between operating - systems for the few services LLVM needs from the operating system. Much of - LLVM is written using portability features of standard C++. However, in a few - areas, system dependent facilities are needed and the System Library is the - wrapper around those system calls.

-

By centralizing LLVM's use of operating system interfaces, we make it - possible for the LLVM tool chain and runtime libraries to be more easily - ported to new platforms since (theoretically) only lib/System needs - to be ported. This library also unclutters the rest of LLVM from #ifdef use - and special cases for specific operating systems. Such uses are replaced - with simple calls to the interfaces provided in include/llvm/System. -

-

Note that the System Library is not intended to be a complete operating - system wrapper (such as the Adaptive Communications Environment (ACE) or - Apache Portable Runtime (APR)), but only provides the functionality necessary - to support LLVM. -

The System Library was written by Reid Spencer who formulated the - design based on similar work originating from the eXtensible Programming - System (XPS). Several people helped with the effort; especially, - Jeff Cohen and Henrik Bach on the Win32 port.

-
- - -

- Keeping LLVM Portable -

-
-

In order to keep LLVM portable, LLVM developers should adhere to a set of - portability rules associated with the System Library. Adherence to these rules - should help the System Library achieve its goal of shielding LLVM from the - variations in operating system interfaces and doing so efficiently. The - following sections define the rules needed to fulfill this objective.

- - -

Don't Include System Headers

-
-

Except in lib/System, no LLVM source code should directly - #include a system header. Care has been taken to remove all such - #includes from LLVM while lib/System was being - developed. Specifically this means that header files like "unistd.h", - "windows.h", "stdio.h", and "string.h" are forbidden to be included by LLVM - source code outside the implementation of lib/System.

-

To obtain system-dependent functionality, existing interfaces to the system - found in include/llvm/System should be used. If an appropriate - interface is not available, it should be added to include/llvm/System - and implemented in lib/System for all supported platforms.

-
- - -

Don't Expose System Headers

-
-

The System Library must shield LLVM from all system headers. To - obtain system level functionality, LLVM source must - #include "llvm/System/Thing.h" and nothing else. This means that - Thing.h cannot expose any system header files. This protects LLVM - from accidentally using system specific functionality and only allows it - via the lib/System interface.

-
- - -

Use Standard C Headers

-
-

The standard C headers (the ones beginning with "c") are allowed - to be exposed through the lib/System interface. These headers and - the things they declare are considered to be platform agnostic. LLVM source - files may include them directly or obtain their inclusion through - lib/System interfaces.

-
- - -

Use Standard C++ Headers

-
-

The standard C++ headers from the standard C++ library and - standard template library may be exposed through the lib/System - interface. These headers and the things they declare are considered to be - platform agnostic. LLVM source files may include them or obtain their - inclusion through lib/System interfaces.

-
- - -

High Level Interface

-
-

The entry points specified in the interface of lib/System must be aimed at - completing some reasonably high level task needed by LLVM. We do not want to - simply wrap each operating system call. It would be preferable to wrap several - operating system calls that are always used in conjunction with one another by - LLVM.

-

For example, consider what is needed to execute a program, wait for it to - complete, and return its result code. On Unix, this involves the following - operating system calls: getenv, fork, execve, and wait. The - correct thing for lib/System to provide is a function, say - ExecuteProgramAndWait, that implements the functionality completely. - what we don't want is wrappers for the operating system calls involved.

-

There must not be a one-to-one relationship between operating - system calls and the System library's interface. Any such interface function - will be suspicious.

-
- - -

No Unused Functionality

-
-

There must be no functionality specified in the interface of lib/System - that isn't actually used by LLVM. We're not writing a general purpose - operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM - doesn't need much. This design goal aims to keep the lib/System interface - small and understandable which should foster its actual use and adoption.

-
- - -

No Duplicate Implementations

-
-

The implementation of a function for a given platform must be written - exactly once. This implies that it must be possible to apply a function's - implementation to multiple operating systems if those operating systems can - share the same implementation. This rule applies to the set of operating - systems supported for a given class of operating system (e.g. Unix, Win32). -

-
- - -

No Virtual Methods

-
-

The System Library interfaces can be called quite frequently by LLVM. In - order to make those calls as efficient as possible, we discourage the use of - virtual methods. There is no need to use inheritance for implementation - differences, it just adds complexity. The #include mechanism works - just fine.

-
- - -

No Exposed Functions

-
-

Any functions defined by system libraries (i.e. not defined by lib/System) - must not be exposed through the lib/System interface, even if the header file - for that function is not exposed. This prevents inadvertent use of system - specific functionality.

-

For example, the stat system call is notorious for having - variations in the data it provides. lib/System must not declare - stat nor allow it to be declared. Instead it should provide its own - interface to discovering information about files and directories. Those - interfaces may be implemented in terms of stat but that is strictly - an implementation detail. The interface provided by the System Library must - be implemented on all platforms (even those without stat).

-
- - -

No Exposed Data

-
-

Any data defined by system libraries (i.e. not defined by lib/System) must - not be exposed through the lib/System interface, even if the header file for - that function is not exposed. As with functions, this prevents inadvertent use - of data that might not exist on all platforms.

-
- - -

Minimize Soft Errors

-
-

Operating system interfaces will generally provide error results for every - little thing that could go wrong. In almost all cases, you can divide these - error results into two groups: normal/good/soft and abnormal/bad/hard. That - is, some of the errors are simply information like "file not found", - "insufficient privileges", etc. while other errors are much harder like - "out of space", "bad disk sector", or "system call interrupted". We'll call - the first group "soft" errors and the second group "hard" - errors.

-

lib/System must always attempt to minimize soft errors. - This is a design requirement because the - minimization of soft errors can affect the granularity and the nature of the - interface. In general, if you find that you're wanting to throw soft errors, - you must review the granularity of the interface because it is likely you're - trying to implement something that is too low level. The rule of thumb is to - provide interface functions that can't fail, except when faced with - hard errors.

-

For a trivial example, suppose we wanted to add an "OpenFileForWriting" - function. For many operating systems, if the file doesn't exist, attempting - to open the file will produce an error. However, lib/System should not - simply throw that error if it occurs because its a soft error. The problem - is that the interface function, OpenFileForWriting is too low level. It should - be OpenOrCreateFileForWriting. In the case of the soft "doesn't exist" error, - this function would just create it and then open it for writing.

-

This design principle needs to be maintained in lib/System because it - avoids the propagation of soft error handling throughout the rest of LLVM. - Hard errors will generally just cause a termination for an LLVM tool so don't - be bashful about throwing them.

-

Rules of thumb:

-
    -
  1. Don't throw soft errors, only hard errors.
  2. -
  3. If you're tempted to throw a soft error, re-think the interface.
  4. -
  5. Handle internally the most common normal/good/soft error conditions - so the rest of LLVM doesn't have to.
  6. -
-
- - -

No throw Specifications

-
-

None of the lib/System interface functions may be declared with C++ - throw() specifications on them. This requirement makes sure that the - compiler does not insert additional exception handling code into the interface - functions. This is a performance consideration: lib/System functions are at - the bottom of many call chains and as such can be frequently called. We - need them to be as efficient as possible. However, no routines in the - system library should actually throw exceptions.

-
- - -

Code Organization

-
-

Implementations of the System Library interface are separated by their - general class of operating system. Currently only Unix and Win32 classes are - defined but more could be added for other operating system classifications. - To distinguish which implementation to compile, the code in lib/System uses - the LLVM_ON_UNIX and LLVM_ON_WIN32 #defines provided via configure through the - llvm/Config/config.h file. Each source file in lib/System, after implementing - the generic (operating system independent) functionality needs to include the - correct implementation using a set of #if defined(LLVM_ON_XYZ) - directives. For example, if we had lib/System/File.cpp, we'd expect to see in - that file:

-

-  #if defined(LLVM_ON_UNIX)
-  #include "Unix/File.cpp"
-  #endif
-  #if defined(LLVM_ON_WIN32)
-  #include "Win32/File.cpp"
-  #endif
-  
-

The implementation in lib/System/Unix/File.cpp should handle all Unix - variants. The implementation in lib/System/Win32/File.cpp should handle all - Win32 variants. What this does is quickly differentiate the basic class of - operating system that will provide the implementation. The specific details - for a given platform must still be determined through the use of - #ifdef.

-
- - -

Consistent Semantics

-
-

The implementation of a lib/System interface can vary drastically between - platforms. That's okay as long as the end result of the interface function - is the same. For example, a function to create a directory is pretty straight - forward on all operating system. System V IPC on the other hand isn't even - supported on all platforms. Instead of "supporting" System V IPC, lib/System - should provide an interface to the basic concept of inter-process - communications. The implementations might use System V IPC if that was - available or named pipes, or whatever gets the job done effectively for a - given operating system. In all cases, the interface and the implementation - must be semantically consistent.

-
- - -

Bug 351

-
-

See bug 351 - for further details on the progress of this work

-
- -
- - - -
-
- Valid CSS - Valid HTML 4.01 - - Reid Spencer
- LLVM Compiler Infrastructure
- Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $ -
- - diff --git a/docs/SystemLibrary.rst b/docs/SystemLibrary.rst new file mode 100644 index 000000000000..0d0f4fa99482 --- /dev/null +++ b/docs/SystemLibrary.rst @@ -0,0 +1,247 @@ +============== +System Library +============== + +Abstract +======== + +This document provides some details on LLVM's System Library, located in the +source at ``lib/System`` and ``include/llvm/System``. The library's purpose is +to shield LLVM from the differences between operating systems for the few +services LLVM needs from the operating system. Much of LLVM is written using +portability features of standard C++. However, in a few areas, system dependent +facilities are needed and the System Library is the wrapper around those system +calls. + +By centralizing LLVM's use of operating system interfaces, we make it possible +for the LLVM tool chain and runtime libraries to be more easily ported to new +platforms since (theoretically) only ``lib/System`` needs to be ported. This +library also unclutters the rest of LLVM from #ifdef use and special cases for +specific operating systems. Such uses are replaced with simple calls to the +interfaces provided in ``include/llvm/System``. + +Note that the System Library is not intended to be a complete operating system +wrapper (such as the Adaptive Communications Environment (ACE) or Apache +Portable Runtime (APR)), but only provides the functionality necessary to +support LLVM. + +The System Library was written by Reid Spencer who formulated the design based +on similar work originating from the eXtensible Programming System (XPS). +Several people helped with the effort; especially, Jeff Cohen and Henrik Bach +on the Win32 port. + +Keeping LLVM Portable +===================== + +In order to keep LLVM portable, LLVM developers should adhere to a set of +portability rules associated with the System Library. Adherence to these rules +should help the System Library achieve its goal of shielding LLVM from the +variations in operating system interfaces and doing so efficiently. The +following sections define the rules needed to fulfill this objective. + +Don't Include System Headers +---------------------------- + +Except in ``lib/System``, no LLVM source code should directly ``#include`` a +system header. Care has been taken to remove all such ``#includes`` from LLVM +while ``lib/System`` was being developed. Specifically this means that header +files like "``unistd.h``", "``windows.h``", "``stdio.h``", and "``string.h``" +are forbidden to be included by LLVM source code outside the implementation of +``lib/System``. + +To obtain system-dependent functionality, existing interfaces to the system +found in ``include/llvm/System`` should be used. If an appropriate interface is +not available, it should be added to ``include/llvm/System`` and implemented in +``lib/System`` for all supported platforms. + +Don't Expose System Headers +--------------------------- + +The System Library must shield LLVM from **all** system headers. To obtain +system level functionality, LLVM source must ``#include "llvm/System/Thing.h"`` +and nothing else. This means that ``Thing.h`` cannot expose any system header +files. This protects LLVM from accidentally using system specific functionality +and only allows it via the ``lib/System`` interface. + +Use Standard C Headers +---------------------- + +The **standard** C headers (the ones beginning with "c") are allowed to be +exposed through the ``lib/System`` interface. These headers and the things they +declare are considered to be platform agnostic. LLVM source files may include +them directly or obtain their inclusion through ``lib/System`` interfaces. + +Use Standard C++ Headers +------------------------ + +The **standard** C++ headers from the standard C++ library and standard +template library may be exposed through the ``lib/System`` interface. These +headers and the things they declare are considered to be platform agnostic. +LLVM source files may include them or obtain their inclusion through +``lib/System`` interfaces. + +High Level Interface +-------------------- + +The entry points specified in the interface of ``lib/System`` must be aimed at +completing some reasonably high level task needed by LLVM. We do not want to +simply wrap each operating system call. It would be preferable to wrap several +operating system calls that are always used in conjunction with one another by +LLVM. + +For example, consider what is needed to execute a program, wait for it to +complete, and return its result code. On Unix, this involves the following +operating system calls: ``getenv``, ``fork``, ``execve``, and ``wait``. The +correct thing for ``lib/System`` to provide is a function, say +``ExecuteProgramAndWait``, that implements the functionality completely. what +we don't want is wrappers for the operating system calls involved. + +There must **not** be a one-to-one relationship between operating system +calls and the System library's interface. Any such interface function will be +suspicious. + +No Unused Functionality +----------------------- + +There must be no functionality specified in the interface of ``lib/System`` +that isn't actually used by LLVM. We're not writing a general purpose operating +system wrapper here, just enough to satisfy LLVM's needs. And, LLVM doesn't +need much. This design goal aims to keep the ``lib/System`` interface small and +understandable which should foster its actual use and adoption. + +No Duplicate Implementations +---------------------------- + +The implementation of a function for a given platform must be written exactly +once. This implies that it must be possible to apply a function's +implementation to multiple operating systems if those operating systems can +share the same implementation. This rule applies to the set of operating +systems supported for a given class of operating system (e.g. Unix, Win32). + +No Virtual Methods +------------------ + +The System Library interfaces can be called quite frequently by LLVM. In order +to make those calls as efficient as possible, we discourage the use of virtual +methods. There is no need to use inheritance for implementation differences, it +just adds complexity. The ``#include`` mechanism works just fine. + +No Exposed Functions +-------------------- + +Any functions defined by system libraries (i.e. not defined by ``lib/System``) +must not be exposed through the ``lib/System`` interface, even if the header +file for that function is not exposed. This prevents inadvertent use of system +specific functionality. + +For example, the ``stat`` system call is notorious for having variations in the +data it provides. ``lib/System`` must not declare ``stat`` nor allow it to be +declared. Instead it should provide its own interface to discovering +information about files and directories. Those interfaces may be implemented in +terms of ``stat`` but that is strictly an implementation detail. The interface +provided by the System Library must be implemented on all platforms (even those +without ``stat``). + +No Exposed Data +--------------- + +Any data defined by system libraries (i.e. not defined by ``lib/System``) must +not be exposed through the ``lib/System`` interface, even if the header file +for that function is not exposed. As with functions, this prevents inadvertent +use of data that might not exist on all platforms. + +Minimize Soft Errors +-------------------- + +Operating system interfaces will generally provide error results for every +little thing that could go wrong. In almost all cases, you can divide these +error results into two groups: normal/good/soft and abnormal/bad/hard. That is, +some of the errors are simply information like "file not found", "insufficient +privileges", etc. while other errors are much harder like "out of space", "bad +disk sector", or "system call interrupted". We'll call the first group "*soft*" +errors and the second group "*hard*" errors. + +``lib/System`` must always attempt to minimize soft errors. This is a design +requirement because the minimization of soft errors can affect the granularity +and the nature of the interface. In general, if you find that you're wanting to +throw soft errors, you must review the granularity of the interface because it +is likely you're trying to implement something that is too low level. The rule +of thumb is to provide interface functions that **can't** fail, except when +faced with hard errors. + +For a trivial example, suppose we wanted to add an "``OpenFileForWriting``" +function. For many operating systems, if the file doesn't exist, attempting to +open the file will produce an error. However, ``lib/System`` should not simply +throw that error if it occurs because its a soft error. The problem is that the +interface function, ``OpenFileForWriting`` is too low level. It should be +``OpenOrCreateFileForWriting``. In the case of the soft "doesn't exist" error, +this function would just create it and then open it for writing. + +This design principle needs to be maintained in ``lib/System`` because it +avoids the propagation of soft error handling throughout the rest of LLVM. +Hard errors will generally just cause a termination for an LLVM tool so don't +be bashful about throwing them. + +Rules of thumb: + +#. Don't throw soft errors, only hard errors. + +#. If you're tempted to throw a soft error, re-think the interface. + +#. Handle internally the most common normal/good/soft error conditions + so the rest of LLVM doesn't have to. + +No throw Specifications +----------------------- + +None of the ``lib/System`` interface functions may be declared with C++ +``throw()`` specifications on them. This requirement makes sure that the +compiler does not insert additional exception handling code into the interface +functions. This is a performance consideration: ``lib/System`` functions are at +the bottom of many call chains and as such can be frequently called. We need +them to be as efficient as possible. However, no routines in the system +library should actually throw exceptions. + +Code Organization +----------------- + +Implementations of the System Library interface are separated by their general +class of operating system. Currently only Unix and Win32 classes are defined +but more could be added for other operating system classifications. To +distinguish which implementation to compile, the code in ``lib/System`` uses +the ``LLVM_ON_UNIX`` and ``LLVM_ON_WIN32`` ``#defines`` provided via configure +through the ``llvm/Config/config.h`` file. Each source file in ``lib/System``, +after implementing the generic (operating system independent) functionality +needs to include the correct implementation using a set of +``#if defined(LLVM_ON_XYZ)`` directives. For example, if we had +``lib/System/File.cpp``, we'd expect to see in that file: + +.. code-block:: c++ + + #if defined(LLVM_ON_UNIX) + #include "Unix/File.cpp" + #endif + #if defined(LLVM_ON_WIN32) + #include "Win32/File.cpp" + #endif + +The implementation in ``lib/System/Unix/File.cpp`` should handle all Unix +variants. The implementation in ``lib/System/Win32/File.cpp`` should handle all +Win32 variants. What this does is quickly differentiate the basic class of +operating system that will provide the implementation. The specific details for +a given platform must still be determined through the use of ``#ifdef``. + +Consistent Semantics +-------------------- + +The implementation of a ``lib/System`` interface can vary drastically between +platforms. That's okay as long as the end result of the interface function is +the same. For example, a function to create a directory is pretty straight +forward on all operating system. System V IPC on the other hand isn't even +supported on all platforms. Instead of "supporting" System V IPC, +``lib/System`` should provide an interface to the basic concept of +inter-process communications. The implementations might use System V IPC if +that was available or named pipes, or whatever gets the job done effectively +for a given operating system. In all cases, the interface and the +implementation must be semantically consistent. + diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst new file mode 100644 index 000000000000..bd28a9031d74 --- /dev/null +++ b/docs/TableGen/LangRef.rst @@ -0,0 +1,383 @@ +=========================== +TableGen Language Reference +=========================== + +.. sectionauthor:: Sean Silva + +.. contents:: + :local: + +.. warning:: + This document is extremely rough. If you find something lacking, please + fix it, file a documentation bug, or ask about it on llvmdev. + +Introduction +============ + +This document is meant to be a normative spec about the TableGen language +in and of itself (i.e. how to understand a given construct in terms of how +it affects the final set of records represented by the TableGen file). If +you are unsure if this document is really what you are looking for, please +read :doc:`/TableGenFundamentals` first. + +Notation +======== + +The lexical and syntax notation used here is intended to imitate +`Python's`_. In particular, for lexical definitions, the productions +operate at the character level and there is no implied whitespace between +elements. The syntax definitions operate at the token level, so there is +implied whitespace between tokens. + +.. _`Python's`: http://docs.python.org/py3k/reference/introduction.html#notation + +Lexical Analysis +================ + +TableGen supports BCPL (``// ...``) and nestable C-style (``/* ... */``) +comments. + +The following is a listing of the basic punctuation tokens:: + + - + [ ] { } ( ) < > : ; . = ? # + +Numeric literals take one of the following forms: + +.. TableGen actually will lex some pretty strange sequences an interpret + them as numbers. What is shown here is an attempt to approximate what it + "should" accept. + +.. productionlist:: + TokInteger: `DecimalInteger` | `HexInteger` | `BinInteger` + DecimalInteger: ["+" | "-"] ("0"..."9")+ + HexInteger: "0x" ("0"..."9" | "a"..."f" | "A"..."F")+ + BinInteger: "0b" ("0" | "1")+ + +One aspect to note is that the :token:`DecimalInteger` token *includes* the +``+`` or ``-``, as opposed to having ``+`` and ``-`` be unary operators as +most languages do. + +TableGen has identifier-like tokens: + +.. productionlist:: + ualpha: "a"..."z" | "A"..."Z" | "_" + TokIdentifier: ("0"..."9")* `ualpha` (`ualpha` | "0"..."9")* + TokVarName: "$" `ualpha` (`ualpha` | "0"..."9")* + +Note that unlike most languages, TableGen allows :token:`TokIdentifier` to +begin with a number. In case of ambiguity, a token will be interpreted as a +numeric literal rather than an identifier. + +TableGen also has two string-like literals: + +.. productionlist:: + TokString: '"' '"' + TokCodeFragment: "[{" "}]" + +.. note:: + The current implementation accepts the following C-like escapes:: + + \\ \' \" \t \n + +TableGen also has the following keywords:: + + bit bits class code dag + def foreach defm field in + int let list multiclass string + +TableGen also has "bang operators" which have a +wide variety of meanings: + +.. productionlist:: + BangOperator: one of + :!eq !if !head !tail !con + :!add !shl !sra !srl + :!cast !empty !subst !foreach !strconcat + +Syntax +====== + +TableGen has an ``include`` mechanism. It does not play a role in the +syntax per se, since it is lexically replaced with the contents of the +included file. + +.. productionlist:: + IncludeDirective: "include" `TokString` + +TableGen's top-level production consists of "objects". + +.. productionlist:: + TableGenFile: `Object`* + Object: `Class` | `Def` | `Defm` | `Let` | `MultiClass` | `Foreach` + +``class``\es +------------ + +.. productionlist:: + Class: "class" `TokIdentifier` [`TemplateArgList`] `ObjectBody` + +A ``class`` declaration creates a record which other records can inherit +from. A class can be parametrized by a list of "template arguments", whose +values can be used in the class body. + +A given class can only be defined once. A ``class`` declaration is +considered to define the class if any of the following is true: + +.. break ObjectBody into its consituents so that they are present here? + +#. The :token:`TemplateArgList` is present. +#. The :token:`Body` in the :token:`ObjectBody` is present and is not empty. +#. The :token:`BaseClassList` in the :token:`ObjectBody` is present. + +You can declare an empty class by giving and empty :token:`TemplateArgList` +and an empty :token:`ObjectBody`. This can serve as a restricted form of +forward declaration: note that records deriving from the forward-declared +class will inherit no fields from it since the record expansion is done +when the record is parsed. + +.. productionlist:: + TemplateArgList: "<" `Declaration` ("," `Declaration`)* ">" + +Declarations +------------ + +.. Omitting mention of arcane "field" prefix to discourage its use. + +The declaration syntax is pretty much what you would expect as a C++ +programmer. + +.. productionlist:: + Declaration: `Type` `TokIdentifier` ["=" `Value`] + +It assigns the value to the identifer. + +Types +----- + +.. productionlist:: + Type: "string" | "code" | "bit" | "int" | "dag" + :| "bits" "<" `TokInteger` ">" + :| "list" "<" `Type` ">" + :| `ClassID` + ClassID: `TokIdentifier` + +Both ``string`` and ``code`` correspond to the string type; the difference +is purely to indicate programmer intention. + +The :token:`ClassID` must identify a class that has been previously +declared or defined. + +Values +------ + +.. productionlist:: + Value: `SimpleValue` `ValueSuffix`* + ValueSuffix: "{" `RangeList` "}" + :| "[" `RangeList` "]" + :| "." `TokIdentifier` + RangeList: `RangePiece` ("," `RangePiece`)* + RangePiece: `TokInteger` + :| `TokInteger` "-" `TokInteger` + :| `TokInteger` `TokInteger` + +The peculiar last form of :token:`RangePiece` is due to the fact that the +"``-``" is included in the :token:`TokInteger`, hence ``1-5`` gets lexed as +two consecutive :token:`TokInteger`'s, with values ``1`` and ``-5``, +instead of "1", "-", and "5". +The :token:`RangeList` can be thought of as specifying "list slice" in some +contexts. + + +:token:`SimpleValue` has a number of forms: + + +.. productionlist:: + SimpleValue: `TokIdentifier` + +The value will be the variable referenced by the identifier. It can be one +of: + +.. The code for this is exceptionally abstruse. These examples are a + best-effort attempt. + +* name of a ``def``, such as the use of ``Bar`` in:: + + def Bar : SomeClass { + int X = 5; + } + + def Foo { + SomeClass Baz = Bar; + } + +* value local to a ``def``, such as the use of ``Bar`` in:: + + def Foo { + int Bar = 5; + int Baz = Bar; + } + +* a template arg of a ``class``, such as the use of ``Bar`` in:: + + class Foo { + int Baz = Bar; + } + +* value local to a ``multiclass``, such as the use of ``Bar`` in:: + + multiclass Foo { + int Bar = 5; + int Baz = Bar; + } + +* a template arg to a ``multiclass``, such as the use of ``Bar`` in:: + + multiclass Foo { + int Baz = Bar; + } + +.. productionlist:: + SimpleValue: `TokInteger` + +This represents the numeric value of the integer. + +.. productionlist:: + SimpleValue: `TokString`+ + +Multiple adjacent string literals are concatenated like in C/C++. The value +is the concatenation of the strings. + +.. productionlist:: + SimpleValue: `TokCodeFragment` + +The value is the string value of the code fragment. + +.. productionlist:: + SimpleValue: "?" + +``?`` represents an "unset" initializer. + +.. productionlist:: + SimpleValue: "{" `ValueList` "}" + ValueList: [`ValueListNE`] + ValueListNE: `Value` ("," `Value`)* + +This represents a sequence of bits, as would be used to initialize a +``bits`` field (where ``n`` is the number of bits). + +.. productionlist:: + SimpleValue: `ClassID` "<" `ValueListNE` ">" + +This generates a new anonymous record definition (as would be created by an +unnamed ``def`` inheriting from the given class with the given template +arguments) and the value is the value of that record definition. + +.. productionlist:: + SimpleValue: "[" `ValueList` "]" ["<" `Type` ">"] + +A list initializer. The optional :token:`Type` can be used to indicate a +specific element type, otherwise the element type will be deduced from the +given values. + +.. The initial `DagArg` of the dag must start with an identifier or + !cast, but this is more of an implementation detail and so for now just + leave it out. + +.. productionlist:: + SimpleValue: "(" `DagArg` `DagArgList` ")" + DagArgList: `DagArg` ("," `DagArg`)* + DagArg: `Value` [":" `TokVarName`] | `TokVarName` + +The initial :token:`DagArg` is called the "operator" of the dag. + +.. productionlist:: + SimpleValue: `BangOperator` ["<" `Type` ">"] "(" `ValueListNE` ")" + +Bodies +------ + +.. productionlist:: + ObjectBody: `BaseClassList` `Body` + BaseClassList: [":" `BaseClassListNE`] + BaseClassListNE: `SubClassRef` ("," `SubClassRef`)* + SubClassRef: (`ClassID` | `MultiClassID`) ["<" `ValueList` ">"] + DefmID: `TokIdentifier` + +The version with the :token:`MultiClassID` is only valid in the +:token:`BaseClassList` of a ``defm``. +The :token:`MultiClassID` should be the name of a ``multiclass``. + +.. put this somewhere else + +It is after parsing the base class list that the "let stack" is applied. + +.. productionlist:: + Body: ";" | "{" BodyList "}" + BodyList: BodyItem* + BodyItem: `Declaration` ";" + :| "let" `TokIdentifier` [`RangeList`] "=" `Value` ";" + +The ``let`` form allows overriding the value of an inherited field. + +``def`` +------- + +.. TODO:: + There can be pastes in the names here, like ``#NAME#``. Look into that + and document it (it boils down to ParseIDValue with IDParseMode == + ParseNameMode). ParseObjectName calls into the general ParseValue, with + the only different from "arbitrary expression parsing" being IDParseMode + == Mode. + +.. productionlist:: + Def: "def" `TokIdentifier` `ObjectBody` + +Defines a record whose name is given by the :token:`TokIdentifier`. The +fields of the record are inherited from the base classes and defined in the +body. + +Special handling occurs if this ``def`` appears inside a ``multiclass`` or +a ``foreach``. + +``defm`` +-------- + +.. productionlist:: + Defm: "defm" `TokIdentifier` ":" `BaseClassListNE` ";" + +Note that in the :token:`BaseClassList`, all of the ``multiclass``'s must +precede any ``class``'s that appear. + +``foreach`` +----------- + +.. productionlist:: + Foreach: "foreach" `Declaration` "in" "{" `Object`* "}" + :| "foreach" `Declaration` "in" `Object` + +The value assigned to the variable in the declaration is iterated over and +the object or object list is reevaluated with the variable set at each +iterated value. + +Top-Level ``let`` +----------------- + +.. productionlist:: + Let: "let" `LetList` "in" "{" `Object`* "}" + :| "let" `LetList` "in" `Object` + LetList: `LetItem` ("," `LetItem`)* + LetItem: `TokIdentifier` [`RangeList`] "=" `Value` + +This is effectively equivalent to ``let`` inside the body of a record +except that it applies to multiple records at a time. The bindings are +applied at the end of parsing the base classes of a record. + +``multiclass`` +-------------- + +.. productionlist:: + MultiClass: "multiclass" `TokIdentifier` [`TemplateArgList`] + : [":" `BaseMultiClassList`] "{" `MultiClassObject`+ "}" + BaseMultiClassList: `MultiClassID` ("," `MultiClassID`)* + MultiClassID: `TokIdentifier` + MultiClassObject: `Def` | `Defm` | `Let` | `Foreach` diff --git a/docs/TableGenFundamentals.rst b/docs/TableGenFundamentals.rst index bfb2618998a9..4fe4bb986a2f 100644 --- a/docs/TableGenFundamentals.rst +++ b/docs/TableGenFundamentals.rst @@ -1,5 +1,3 @@ -.. _tablegen: - ===================== TableGen Fundamentals ===================== @@ -120,16 +118,16 @@ this (at the time of this writing): } ... -This definition corresponds to a 32-bit register-register add instruction in the -X86. The string after the '``def``' string indicates the name of the -record---"``ADD32rr``" in this case---and the comment at the end of the line -indicates the superclasses of the definition. The body of the record contains -all of the data that TableGen assembled for the record, indicating that the -instruction is part of the "X86" namespace, the pattern indicating how the the -instruction should be emitted into the assembly file, that it is a two-address -instruction, has a particular encoding, etc. The contents and semantics of the -information in the record is specific to the needs of the X86 backend, and is -only shown as an example. +This definition corresponds to the 32-bit register-register ``add`` instruction +of the x86 architecture. ``def ADD32rr`` defines a record named +``ADD32rr``, and the comment at the end of the line indicates the superclasses +of the definition. The body of the record contains all of the data that +TableGen assembled for the record, indicating that the instruction is part of +the "X86" namespace, the pattern indicating how the instruction should be +emitted into the assembly file, that it is a two-address instruction, has a +particular encoding, etc. The contents and semantics of the information in the +record are specific to the needs of the X86 backend, and are only shown as an +example. As you can see, a lot of information is needed for every instruction supported by the code generator, and specifying it all manually would be unmaintainable, @@ -152,13 +150,12 @@ factor out the common features that instructions of its class share. A key feature of TableGen is that it allows the end-user to define the abstractions they prefer to use when describing their information. -Each def record has a special entry called "``NAME``." This is the name of the -def ("``ADD32rr``" above). In the general case def names can be formed from -various kinds of string processing expressions and ``NAME`` resolves to the +Each ``def`` record has a special entry called "NAME". This is the name of the +record ("``ADD32rr``" above). In the general case ``def`` names can be formed +from various kinds of string processing expressions and ``NAME`` resolves to the final value obtained after resolving all of those expressions. The user may -refer to ``NAME`` anywhere she desires to use the ultimate name of the def. -``NAME`` should not be defined anywhere else in user code to avoid conflict -problems. +refer to ``NAME`` anywhere she desires to use the ultimate name of the ``def``. +``NAME`` should not be defined anywhere else in user code to avoid conflicts. Running TableGen ---------------- @@ -794,6 +791,10 @@ Expressions used by code generator to describe instructions and isel patterns: TableGen backends ================= +Until we get a step-by-step HowTo for writing TableGen backends, you can at +least grab the boilerplate (build system, new files, etc.) from Clang's +r173931. + TODO: How they work, how to write one. This section should not contain details about any particular backend, except maybe ``-print-enums`` as an example. This should highlight the APIs in ``TableGen/Record.h``. diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html deleted file mode 100644 index 1b24250380fb..000000000000 --- a/docs/TestSuiteMakefileGuide.html +++ /dev/null @@ -1,351 +0,0 @@ - - - - - LLVM test-suite Makefile Guide - - - - -

- LLVM test-suite Makefile Guide -

- -
    -
  1. Overview
  2. -
  3. Test suite structure
  4. -
  5. Running the test suite - -
  6. -
- -
-

Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner

-
- - -

Overview

- - -
- -

This document describes the features of the Makefile-based LLVM -test-suite. This way of interacting with the test-suite is deprecated in favor -of running the test-suite using LNT, but may continue to prove useful for some -users. See the Testing -Guide's test-suite -Quickstart section for more information.

- -
- - -

Test suite Structure

- - -
- -

The test-suite module contains a number of programs that can be compiled -with LLVM and executed. These programs are compiled using the native compiler -and various LLVM backends. The output from the program compiled with the -native compiler is assumed correct; the results from the other programs are -compared to the native program output and pass if they match.

- -

When executing tests, it is usually a good idea to start out with a subset of -the available tests or programs. This makes test run times smaller at first and -later on this is useful to investigate individual test failures. To run some -test only on a subset of programs, simply change directory to the programs you -want tested and run gmake there. Alternatively, you can run a different -test using the TEST variable to change what tests or run on the -selected programs (see below for more info).

- -

In addition for testing correctness, the test-suite directory also -performs timing tests of various LLVM optimizations. It also records -compilation times for the compilers and the JIT. This information can be -used to compare the effectiveness of LLVM's optimizations and code -generation.

- -

test-suite tests are divided into three types of tests: MultiSource, -SingleSource, and External.

- -
    -
  • test-suite/SingleSource -

    The SingleSource directory contains test programs that are only a single -source file in size. These are usually small benchmark programs or small -programs that calculate a particular value. Several such programs are grouped -together in each directory.

  • - -
  • test-suite/MultiSource -

    The MultiSource directory contains subdirectories which contain entire -programs with multiple source files. Large benchmarks and whole applications -go here.

  • - -
  • test-suite/External -

    The External directory contains Makefiles for building code that is external -to (i.e., not distributed with) LLVM. The most prominent members of this -directory are the SPEC 95 and SPEC 2000 benchmark suites. The External -directory does not contain these actual tests, but only the Makefiles that know -how to properly compile these programs from somewhere else. The presence and -location of these external programs is configured by the test-suite -configure script.

  • -
- -

Each tree is then subdivided into several categories, including applications, -benchmarks, regression tests, code that is strange grammatically, etc. These -organizations should be relatively self explanatory.

- -

Some tests are known to fail. Some are bugs that we have not fixed yet; -others are features that we haven't added yet (or may never add). In the -regression tests, the result for such tests will be XFAIL (eXpected FAILure). -In this way, you can tell the difference between an expected and unexpected -failure.

- -

The tests in the test suite have no such feature at this time. If the -test passes, only warnings and other miscellaneous output will be generated. If -a test fails, a large <program> FAILED message will be displayed. This -will help you separate benign warnings from actual test failures.

- -
- - -

Running the test suite

- - -
- -

First, all tests are executed within the LLVM object directory tree. They -are not executed inside of the LLVM source tree. This is because the -test suite creates temporary files during execution.

- -

To run the test suite, you need to use the following steps:

- -
    -
  1. cd into the llvm/projects directory in your source tree. -
  2. - -
  3. Check out the test-suite module with:

    - -
    -
    -% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
    -
    -
    -

    This will get the test suite into llvm/projects/test-suite.

    -
  4. -
  5. Configure and build llvm.

  6. -
  7. Configure and build llvm-gcc.

  8. -
  9. Install llvm-gcc somewhere.

  10. -
  11. Re-configure llvm from the top level of - each build tree (LLVM object directory tree) in which you want - to run the test suite, just as you do before building LLVM.

    -

    During the re-configuration, you must either: (1) - have llvm-gcc you just built in your path, or (2) - specify the directory where your just-built llvm-gcc is - installed using --with-llvmgccdir=$LLVM_GCC_DIR.

    -

    You must also tell the configure machinery that the test suite - is available so it can be configured for your build tree:

    -
    -
    -% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
    -
    -
    -

    [Remember that $LLVM_GCC_DIR is the directory where you - installed llvm-gcc, not its src or obj directory.]

    -
  12. - -
  13. You can now run the test suite from your build tree as follows:

    -
    -
    -% cd $LLVM_OBJ_ROOT/projects/test-suite
    -% make
    -
    -
    -
  14. -
-

Note that the second and third steps only need to be done once. After you -have the suite checked out and configured, you don't need to do it again (unless -the test code or configure script changes).

- - -

- Configuring External Tests -

- - -
-

In order to run the External tests in the test-suite - module, you must specify --with-externals. This - must be done during the re-configuration step (see above), - and the llvm re-configuration must recognize the - previously-built llvm-gcc. If any of these is missing or - neglected, the External tests won't work.

-
-
--with-externals
-
--with-externals=<directory>
-
- This tells LLVM where to find any external tests. They are expected to be - in specifically named subdirectories of <directory>. - If directory is left unspecified, - configure uses the default value - /home/vadve/shared/benchmarks/speccpu2000/benchspec. - Subdirectory names known to LLVM include: -
-
spec95
-
speccpu2000
-
speccpu2006
-
povray31
-
- Others are added from time to time, and can be determined from - configure. -
- - -

- Running different tests -

- -
-

In addition to the regular "whole program" tests, the test-suite -module also provides a mechanism for compiling the programs in different ways. -If the variable TEST is defined on the gmake command line, the test system will -include a Makefile named TEST.<value of TEST variable>.Makefile. -This Makefile can modify build rules to yield different results.

- -

For example, the LLVM nightly tester uses TEST.nightly.Makefile to -create the nightly test reports. To run the nightly tests, run gmake -TEST=nightly.

- -

There are several TEST Makefiles available in the tree. Some of them are -designed for internal LLVM research and will not work outside of the LLVM -research group. They may still be valuable, however, as a guide to writing your -own TEST Makefile for any optimization or analysis passes that you develop with -LLVM.

- -
- - -

- Generating test output -

- -
-

There are a number of ways to run the tests and generate output. The most - simple one is simply running gmake with no arguments. This will - compile and run all programs in the tree using a number of different methods - and compare results. Any failures are reported in the output, but are likely - drowned in the other output. Passes are not reported explicitly.

- -

Somewhat better is running gmake TEST=sometest test, which runs - the specified test and usually adds per-program summaries to the output - (depending on which sometest you use). For example, the nightly test - explicitly outputs TEST-PASS or TEST-FAIL for every test after each program. - Though these lines are still drowned in the output, it's easy to grep the - output logs in the Output directories.

- -

Even better are the report and report.format targets - (where format is one of html, csv, text or - graphs). The exact contents of the report are dependent on which - TEST you are running, but the text results are always shown at the - end of the run and the results are always stored in the - report.<type>.format file (when running with - TEST=<type>). - - The report also generate a file called - report.<type>.raw.out containing the output of the entire test - run. -

- - -

- Writing custom tests for the test suite -

- - -
- -

Assuming you can run the test suite, (e.g. "gmake TEST=nightly report" -should work), it is really easy to run optimizations or code generator -components against every program in the tree, collecting statistics or running -custom checks for correctness. At base, this is how the nightly tester works, -it's just one example of a general framework.

- -

Lets say that you have an LLVM optimization pass, and you want to see how -many times it triggers. First thing you should do is add an LLVM -statistic to your pass, which -will tally counts of things you care about.

- -

Following this, you can set up a test and a report that collects these and -formats them for easy viewing. This consists of two files, a -"test-suite/TEST.XXX.Makefile" fragment (where XXX is the name of your -test) and a "test-suite/TEST.XXX.report" file that indicates how to -format the output into a table. There are many example reports of various -levels of sophistication included with the test suite, and the framework is very -general.

- -

If you are interested in testing an optimization pass, check out the -"libcalls" test as an example. It can be run like this:

- -

-
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=libcalls report
-
-
- -

This will do a bunch of stuff, then eventually print a table like this:

- -
-
-Name                                  | total | #exit |
-...
-FreeBench/analyzer/analyzer           | 51    | 6     | 
-FreeBench/fourinarow/fourinarow       | 1     | 1     | 
-FreeBench/neural/neural               | 19    | 9     | 
-FreeBench/pifft/pifft                 | 5     | 3     | 
-MallocBench/cfrac/cfrac               | 1     | *     | 
-MallocBench/espresso/espresso         | 52    | 12    | 
-MallocBench/gs/gs                     | 4     | *     | 
-Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
-Prolangs-C/agrep/agrep                | 33    | 12    | 
-Prolangs-C/allroots/allroots          | *     | *     | 
-Prolangs-C/assembler/assembler        | 47    | *     | 
-Prolangs-C/bison/mybison              | 74    | *     | 
-...
-
-
- -

This basically is grepping the -stats output and displaying it in a table. -You can also use the "TEST=libcalls report.html" target to get the table in HTML -form, similarly for report.csv and report.tex.

- -

The source for this is in test-suite/TEST.libcalls.*. The format is pretty -simple: the Makefile indicates how to run the test (in this case, -"opt -simplify-libcalls -stats"), and the report contains one line for -each column of the output. The first value is the header for the column and the -second is the regex to grep the output of the command for. There are lots of -example reports that can do fancy stuff.

- -
- -
- - - -
-
- Valid CSS - Valid HTML 4.01 - - John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date$ -
- - diff --git a/docs/TestSuiteMakefileGuide.rst b/docs/TestSuiteMakefileGuide.rst new file mode 100644 index 000000000000..e2852a073518 --- /dev/null +++ b/docs/TestSuiteMakefileGuide.rst @@ -0,0 +1,276 @@ +============================== +LLVM test-suite Makefile Guide +============================== + +.. contents:: + :local: + +Overview +======== + +This document describes the features of the Makefile-based LLVM +test-suite. This way of interacting with the test-suite is deprecated in +favor of running the test-suite using LNT, but may continue to prove +useful for some users. See the Testing Guide's :ref:`test-suite Quickstart +` section for more information. + +Test suite Structure +==================== + +The ``test-suite`` module contains a number of programs that can be +compiled with LLVM and executed. These programs are compiled using the +native compiler and various LLVM backends. The output from the program +compiled with the native compiler is assumed correct; the results from +the other programs are compared to the native program output and pass if +they match. + +When executing tests, it is usually a good idea to start out with a +subset of the available tests or programs. This makes test run times +smaller at first and later on this is useful to investigate individual +test failures. To run some test only on a subset of programs, simply +change directory to the programs you want tested and run ``gmake`` +there. Alternatively, you can run a different test using the ``TEST`` +variable to change what tests or run on the selected programs (see below +for more info). + +In addition for testing correctness, the ``test-suite`` directory also +performs timing tests of various LLVM optimizations. It also records +compilation times for the compilers and the JIT. This information can be +used to compare the effectiveness of LLVM's optimizations and code +generation. + +``test-suite`` tests are divided into three types of tests: MultiSource, +SingleSource, and External. + +- ``test-suite/SingleSource`` + + The SingleSource directory contains test programs that are only a + single source file in size. These are usually small benchmark + programs or small programs that calculate a particular value. Several + such programs are grouped together in each directory. + +- ``test-suite/MultiSource`` + + The MultiSource directory contains subdirectories which contain + entire programs with multiple source files. Large benchmarks and + whole applications go here. + +- ``test-suite/External`` + + The External directory contains Makefiles for building code that is + external to (i.e., not distributed with) LLVM. The most prominent + members of this directory are the SPEC 95 and SPEC 2000 benchmark + suites. The ``External`` directory does not contain these actual + tests, but only the Makefiles that know how to properly compile these + programs from somewhere else. The presence and location of these + external programs is configured by the test-suite ``configure`` + script. + +Each tree is then subdivided into several categories, including +applications, benchmarks, regression tests, code that is strange +grammatically, etc. These organizations should be relatively self +explanatory. + +Some tests are known to fail. Some are bugs that we have not fixed yet; +others are features that we haven't added yet (or may never add). In the +regression tests, the result for such tests will be XFAIL (eXpected +FAILure). In this way, you can tell the difference between an expected +and unexpected failure. + +The tests in the test suite have no such feature at this time. If the +test passes, only warnings and other miscellaneous output will be +generated. If a test fails, a large FAILED message will be +displayed. This will help you separate benign warnings from actual test +failures. + +Running the test suite +====================== + +First, all tests are executed within the LLVM object directory tree. +They *are not* executed inside of the LLVM source tree. This is because +the test suite creates temporary files during execution. + +To run the test suite, you need to use the following steps: + +#. ``cd`` into the ``llvm/projects`` directory in your source tree. +#. Check out the ``test-suite`` module with: + + .. code-block:: bash + + % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite + + This will get the test suite into ``llvm/projects/test-suite``. + +#. Configure and build ``llvm``. + +#. Configure and build ``llvm-gcc``. + +#. Install ``llvm-gcc`` somewhere. + +#. *Re-configure* ``llvm`` from the top level of each build tree (LLVM + object directory tree) in which you want to run the test suite, just + as you do before building LLVM. + + During the *re-configuration*, you must either: (1) have ``llvm-gcc`` + you just built in your path, or (2) specify the directory where your + just-built ``llvm-gcc`` is installed using + ``--with-llvmgccdir=$LLVM_GCC_DIR``. + + You must also tell the configure machinery that the test suite is + available so it can be configured for your build tree: + + .. code-block:: bash + + % cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR] + + [Remember that ``$LLVM_GCC_DIR`` is the directory where you + *installed* llvm-gcc, not its src or obj directory.] + +#. You can now run the test suite from your build tree as follows: + + .. code-block:: bash + + % cd $LLVM_OBJ_ROOT/projects/test-suite + % make + +Note that the second and third steps only need to be done once. After +you have the suite checked out and configured, you don't need to do it +again (unless the test code or configure script changes). + +Configuring External Tests +-------------------------- + +In order to run the External tests in the ``test-suite`` module, you +must specify *--with-externals*. This must be done during the +*re-configuration* step (see above), and the ``llvm`` re-configuration +must recognize the previously-built ``llvm-gcc``. If any of these is +missing or neglected, the External tests won't work. + +* *--with-externals* + +* *--with-externals=* + +This tells LLVM where to find any external tests. They are expected to +be in specifically named subdirectories of <``directory``>. If +``directory`` is left unspecified, ``configure`` uses the default value +``/home/vadve/shared/benchmarks/speccpu2000/benchspec``. Subdirectory +names known to LLVM include: + +* spec95 + +* speccpu2000 + +* speccpu2006 + +* povray31 + +Others are added from time to time, and can be determined from +``configure``. + +Running different tests +----------------------- + +In addition to the regular "whole program" tests, the ``test-suite`` +module also provides a mechanism for compiling the programs in different +ways. If the variable TEST is defined on the ``gmake`` command line, the +test system will include a Makefile named +``TEST..Makefile``. This Makefile can modify +build rules to yield different results. + +For example, the LLVM nightly tester uses ``TEST.nightly.Makefile`` to +create the nightly test reports. To run the nightly tests, run +``gmake TEST=nightly``. + +There are several TEST Makefiles available in the tree. Some of them are +designed for internal LLVM research and will not work outside of the +LLVM research group. They may still be valuable, however, as a guide to +writing your own TEST Makefile for any optimization or analysis passes +that you develop with LLVM. + +Generating test output +---------------------- + +There are a number of ways to run the tests and generate output. The +most simple one is simply running ``gmake`` with no arguments. This will +compile and run all programs in the tree using a number of different +methods and compare results. Any failures are reported in the output, +but are likely drowned in the other output. Passes are not reported +explicitly. + +Somewhat better is running ``gmake TEST=sometest test``, which runs the +specified test and usually adds per-program summaries to the output +(depending on which sometest you use). For example, the ``nightly`` test +explicitly outputs TEST-PASS or TEST-FAIL for every test after each +program. Though these lines are still drowned in the output, it's easy +to grep the output logs in the Output directories. + +Even better are the ``report`` and ``report.format`` targets (where +``format`` is one of ``html``, ``csv``, ``text`` or ``graphs``). The +exact contents of the report are dependent on which ``TEST`` you are +running, but the text results are always shown at the end of the run and +the results are always stored in the ``report..format`` file (when +running with ``TEST=``). The ``report`` also generate a file +called ``report..raw.out`` containing the output of the entire +test run. + +Writing custom tests for the test suite +--------------------------------------- + +Assuming you can run the test suite, (e.g. +"``gmake TEST=nightly report``" should work), it is really easy to run +optimizations or code generator components against every program in the +tree, collecting statistics or running custom checks for correctness. At +base, this is how the nightly tester works, it's just one example of a +general framework. + +Lets say that you have an LLVM optimization pass, and you want to see +how many times it triggers. First thing you should do is add an LLVM +`statistic `_ to your pass, which will +tally counts of things you care about. + +Following this, you can set up a test and a report that collects these +and formats them for easy viewing. This consists of two files, a +"``test-suite/TEST.XXX.Makefile``" fragment (where XXX is the name of +your test) and a "``test-suite/TEST.XXX.report``" file that indicates +how to format the output into a table. There are many example reports of +various levels of sophistication included with the test suite, and the +framework is very general. + +If you are interested in testing an optimization pass, check out the +"libcalls" test as an example. It can be run like this: + +.. code-block:: bash + + % cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level + % make TEST=libcalls report + +This will do a bunch of stuff, then eventually print a table like this: + +:: + + Name | total | #exit | + ... + FreeBench/analyzer/analyzer | 51 | 6 | + FreeBench/fourinarow/fourinarow | 1 | 1 | + FreeBench/neural/neural | 19 | 9 | + FreeBench/pifft/pifft | 5 | 3 | + MallocBench/cfrac/cfrac | 1 | * | + MallocBench/espresso/espresso | 52 | 12 | + MallocBench/gs/gs | 4 | * | + Prolangs-C/TimberWolfMC/timberwolfmc | 302 | * | + Prolangs-C/agrep/agrep | 33 | 12 | + Prolangs-C/allroots/allroots | * | * | + Prolangs-C/assembler/assembler | 47 | * | + Prolangs-C/bison/mybison | 74 | * | + ... + +This basically is grepping the -stats output and displaying it in a +table. You can also use the "TEST=libcalls report.html" target to get +the table in HTML form, similarly for report.csv and report.tex. + +The source for this is in ``test-suite/TEST.libcalls.*``. The format is +pretty simple: the Makefile indicates how to run the test (in this case, +"``opt -simplify-libcalls -stats``"), and the report contains one line +for each column of the output. The first value is the header for the +column and the second is the regex to grep the output of the command +for. There are lots of example reports that can do fancy stuff. diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html deleted file mode 100644 index c313083fa76a..000000000000 --- a/docs/TestingGuide.html +++ /dev/null @@ -1,916 +0,0 @@ - - - - - LLVM Testing Infrastructure Guide - - - - -

- LLVM Testing Infrastructure Guide -

- -
    -
  1. Overview
  2. -
  3. Requirements
  4. -
  5. LLVM testing infrastructure organization - -
  6. -
  7. Quick start - -
  8. -
  9. Regression test structure - -
  10. -
  11. test-suite Overview - -
  12. -
- -
-

Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner

-
- - -

Overview

- - -
- -

This document is the reference manual for the LLVM testing infrastructure. It -documents the structure of the LLVM testing infrastructure, the tools needed to -use it, and how to add and run tests.

- -
- - -

Requirements

- - -
- -

In order to use the LLVM testing infrastructure, you will need all of the -software required to build LLVM, as well -as Python 2.4 or later.

- -
- - -

LLVM testing infrastructure organization

- - -
- -

The LLVM testing infrastructure contains two major categories of tests: -regression tests and whole programs. The regression tests are contained inside -the LLVM repository itself under llvm/test and are expected to always -pass -- they should be run before every commit.

- -

The whole programs tests are referred to as the "LLVM test suite" (or -"test-suite") and are in the test-suite module in subversion. For -historical reasons, these tests are also referred to as the "nightly tests" in -places, which is less ambiguous than "test-suite" and remains in use although we -run them much more often than nightly.

- - -

Regression tests

- - -
- -

The regression tests are small pieces of code that test a specific feature of -LLVM or trigger a specific bug in LLVM. They are usually written in LLVM -assembly language, but can be written in other languages if the test targets a -particular language front end (and the appropriate --with-llvmgcc -options were used at configure time of the llvm module). These -tests are driven by the 'lit' testing tool, which is part of LLVM.

- -

These code fragments are not complete programs. The code generated -from them is never executed to determine correct behavior.

- -

These code fragment tests are located in the llvm/test -directory.

- -

Typically when a bug is found in LLVM, a regression test containing -just enough code to reproduce the problem should be written and placed -somewhere underneath this directory. In most cases, this will be a small -piece of LLVM assembly language code, often distilled from an actual -application or benchmark.

- -
- - -

test-suite

- - -
- -

The test suite contains whole programs, which are pieces of code which can be -compiled and linked into a stand-alone program that can be executed. These -programs are generally written in high level languages such as C or C++.

- -

These programs are compiled using a user specified compiler and set of flags, -and then executed to capture the program output and timing information. The -output of these programs is compared to a reference output to ensure that the -program is being compiled correctly.

- -

In addition to compiling and executing programs, whole program tests serve as -a way of benchmarking LLVM performance, both in terms of the efficiency of the -programs generated as well as the speed with which LLVM compiles, optimizes, and -generates code.

- -

The test-suite is located in the test-suite Subversion module.

- -
- - -

Debugging Information tests

- - -
- -

The test suite contains tests to check quality of debugging information. -The test are written in C based languages or in LLVM assembly language.

- -

These tests are compiled and run under a debugger. The debugger output -is checked to validate of debugging information. See README.txt in the -test suite for more information . This test suite is located in the -debuginfo-tests Subversion module.

- -
- -
- - -

Quick start

- - -
- -

The tests are located in two separate Subversion modules. The regressions - tests are in the main "llvm" module under the directory - llvm/test (so you get these tests for free with the main llvm - tree). Use "make check-all" to run the regression tests after building - LLVM.

- -

The more comprehensive test suite that includes whole programs in C and C++ - is in the test-suite - module. See test-suite Quickstart - for more information on running these tests.

- - -

Regression tests

-
- -

To run all of the LLVM regression tests, use master Makefile in - the llvm/test directory:

- -
-
-% gmake -C llvm/test
-
-
- -

or

- -
-
-% gmake check
-
-
- -

If you have Clang checked out and built, -you can run the LLVM and Clang tests simultaneously using:

- -

or

- -
-
-% gmake check-all
-
-
- -

To run the tests with Valgrind (Memcheck by default), just append -VG=1 to the commands above, e.g.:

- -
-
-% gmake check VG=1
-
-
- -

To run individual tests or subsets of tests, you can use the 'llvm-lit' -script which is built as part of LLVM. For example, to run the -'Integer/BitPacked.ll' test by itself you can run:

- -
-
-% llvm-lit ~/llvm/test/Integer/BitPacked.ll 
-
-
- -

or to run all of the ARM CodeGen tests:

- -
-
-% llvm-lit ~/llvm/test/CodeGen/ARM
-
-
- -

For more information on using the 'lit' tool, see 'llvm-lit --help' or the -'lit' man page.

- -
- - -

Debugging Information tests

-
- -
- -

To run debugging information tests simply checkout the tests inside -clang/test directory.

- -
-
-%cd clang/test
-% svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
-
-
- -

These tests are already set up to run as part of clang regression tests.

- -
- -
- -
- - -

Regression test structure

- -
-

The LLVM regression tests are driven by 'lit' and are located in - the llvm/test directory. - -

This directory contains a large array of small tests - that exercise various features of LLVM and to ensure that regressions do not - occur. The directory is broken into several sub-directories, each focused on - a particular area of LLVM. A few of the important ones are:

- -
    -
  • Analysis: checks Analysis passes.
  • -
  • Archive: checks the Archive library.
  • -
  • Assembler: checks Assembly reader/writer functionality.
  • -
  • Bitcode: checks Bitcode reader/writer functionality.
  • -
  • CodeGen: checks code generation and each target.
  • -
  • Features: checks various features of the LLVM language.
  • -
  • Linker: tests bitcode linking.
  • -
  • Transforms: tests each of the scalar, IPO, and utility - transforms to ensure they make the right transformations.
  • -
  • Verifier: tests the IR verifier.
  • -
- - -

Writing new regression tests

- -
-

The regression test structure is very simple, but does require some - information to be set. This information is gathered via configure and - is written to a file, lit.site.cfg - in llvm/test. The llvm/test Makefile does this work for - you.

- -

In order for the regression tests to work, each directory of tests must - have a lit.local.cfg file. Lit looks for this file to determine how - to run the tests. This file is just Python code and thus is very flexible, - but we've standardized it for the LLVM regression tests. If you're adding a - directory of tests, just copy lit.local.cfg from another directory to - get running. The standard lit.local.cfg simply specifies which files - to look in for tests. Any directory that contains only directories does not - need the lit.local.cfg file. Read the - Lit documentation for more - information.

- -

The llvm-runtests function looks at each file that is passed to - it and gathers any lines together that match "RUN:". These are the "RUN" lines - that specify how the test is to be run. So, each test script must contain - RUN lines if it is to do anything. If there are no RUN lines, the - llvm-runtests function will issue an error and the test will - fail.

- -

RUN lines are specified in the comments of the test program using the - keyword RUN followed by a colon, and lastly the command (pipeline) - to execute. Together, these lines form the "script" that - llvm-runtests executes to run the test case. The syntax of the - RUN lines is similar to a shell's syntax for pipelines including I/O - redirection and variable substitution. However, even though these lines - may look like a shell script, they are not. RUN lines are interpreted - directly by the Tcl exec command. They are never executed by a - shell. Consequently the syntax differs from normal shell script syntax in a - few ways. You can specify as many RUN lines as needed.

- -

lit performs substitution on each RUN line to replace LLVM tool - names with the full paths to the executable built for each tool (in - $(LLVM_OBJ_ROOT)/$(BuildMode)/bin). This ensures that lit does not - invoke any stray LLVM tools in the user's path during testing.

- -

Each RUN line is executed on its own, distinct from other lines unless - its last character is \. This continuation character causes the RUN - line to be concatenated with the next one. In this way you can build up long - pipelines of commands without making huge line lengths. The lines ending in - \ are concatenated until a RUN line that doesn't end in \ is - found. This concatenated set of RUN lines then constitutes one execution. - Tcl will substitute variables and arrange for the pipeline to be executed. If - any process in the pipeline fails, the entire line (and test case) fails too. -

- -

Below is an example of legal RUN lines in a .ll file:

- -
-
-; RUN: llvm-as < %s | llvm-dis > %t1
-; RUN: llvm-dis < %s.bc-13 > %t2
-; RUN: diff %t1 %t2
-
-
- -

As with a Unix shell, the RUN: lines permit pipelines and I/O redirection - to be used. However, the usage is slightly different than for Bash. To check - what's legal, see the documentation for the - Tcl exec - command and the - tutorial. - The major differences are:

-
    -
  • You can't do 2>&1. That will cause Tcl to write to a - file named &1. Usually this is done to get stderr to go through - a pipe. You can do that in tcl with |& so replace this idiom: - ... 2>&1 | grep with ... |& grep
  • -
  • You can only redirect to a file, not to another descriptor and not from - a here document.
  • -
  • tcl supports redirecting to open files with the @ syntax but you - shouldn't use that here.
  • -
- -

There are some quoting rules that you must pay attention to when writing - your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any - quote characters so they will get passed to the invoked program. For - example:

- -
-
-... | grep 'find this string'
-
-
- -

This will fail because the ' characters are passed to grep. This would - instruction grep to look for 'find in the files this and - string'. To avoid this use curly braces to tell Tcl that it should - treat everything enclosed as one value. So our example would become:

- -
-
-... | grep {find this string}
-
-
- -

Additionally, the characters [ and ] are treated - specially by Tcl. They tell Tcl to interpret the content as a command to - execute. Since these characters are often used in regular expressions this can - have disastrous results and cause the entire test run in a directory to fail. - For example, a common idiom is to look for some basicblock number:

- -
-
-... | grep bb[2-8]
-
-
- -

This, however, will cause Tcl to fail because its going to try to execute - a program named "2-8". Instead, what you want is this:

- -
-
-... | grep {bb\[2-8\]}
-
-
- -

Finally, if you need to pass the \ character down to a program, - then it must be doubled. This is another Tcl special character. So, suppose - you had: - -

-
-... | grep 'i32\*'
-
-
- -

This will fail to match what you want (a pointer to i32). First, the - ' do not get stripped off. Second, the \ gets stripped off - by Tcl so what grep sees is: 'i32*'. That's not likely to match - anything. To resolve this you must use \\ and the {}, like - this:

- -
-
-... | grep {i32\\*}
-
-
- -

If your system includes GNU grep, make sure -that GREP_OPTIONS is not set in your environment. Otherwise, -you may get invalid results (both false positives and false -negatives).

- -
- - -

The FileCheck utility

- - -
- -

A powerful feature of the RUN: lines is that it allows any arbitrary commands - to be executed as part of the test harness. While standard (portable) unix - tools like 'grep' work fine on run lines, as you see above, there are a lot - of caveats due to interaction with Tcl syntax, and we want to make sure the - run lines are portable to a wide range of systems. Another major problem is - that grep is not very good at checking to verify that the output of a tools - contains a series of different output in a specific order. The FileCheck - tool was designed to help with these problems.

- -

FileCheck (whose basic command line arguments are described in the FileCheck man page is - designed to read a file to check from standard input, and the set of things - to verify from a file specified as a command line argument. A simple example - of using FileCheck from a RUN line looks like this:

- -
-
-; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
-
-
- -

This syntax says to pipe the current file ("%s") into llvm-as, pipe that into -llc, then pipe the output of llc into FileCheck. This means that FileCheck will -be verifying its standard input (the llc output) against the filename argument -specified (the original .ll file specified by "%s"). To see how this works, -let's look at the rest of the .ll file (after the RUN line):

- -
-
-define void @sub1(i32* %p, i32 %v) {
-entry:
-; CHECK: sub1:
-; CHECK: subl
-        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
-        ret void
-}
-
-define void @inc4(i64* %p) {
-entry:
-; CHECK: inc4:
-; CHECK: incq
-        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
-        ret void
-}
-
-
- -

Here you can see some "CHECK:" lines specified in comments. Now you can see -how the file is piped into llvm-as, then llc, and the machine code output is -what we are verifying. FileCheck checks the machine code output to verify that -it matches what the "CHECK:" lines specify.

- -

The syntax of the CHECK: lines is very simple: they are fixed strings that -must occur in order. FileCheck defaults to ignoring horizontal whitespace -differences (e.g. a space is allowed to match a tab) but otherwise, the contents -of the CHECK: line is required to match some thing in the test file exactly.

- -

One nice thing about FileCheck (compared to grep) is that it allows merging -test cases together into logical groups. For example, because the test above -is checking for the "sub1:" and "inc4:" labels, it will not match unless there -is a "subl" in between those labels. If it existed somewhere else in the file, -that would not count: "grep subl" matches if subl exists anywhere in the -file.

- - -

- The FileCheck -check-prefix option -

- -
- -

The FileCheck -check-prefix option allows multiple test configurations to be -driven from one .ll file. This is useful in many circumstances, for example, -testing different architectural variants with llc. Here's a simple example:

- -
-
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
-; RUN:              | FileCheck %s -check-prefix=X32
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
-; RUN:              | FileCheck %s -check-prefix=X64
-
-define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
-        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
-        ret <4 x i32> %tmp1
-; X32: pinsrd_1:
-; X32:    pinsrd $1, 4(%esp), %xmm0
-
-; X64: pinsrd_1:
-; X64:    pinsrd $1, %edi, %xmm0
-}
-
-
- -

In this case, we're testing that we get the expected code generation with -both 32-bit and 64-bit code generation.

- -
- - -

- The "CHECK-NEXT:" directive -

- -
- -

Sometimes you want to match lines and would like to verify that matches -happen on exactly consecutive lines with no other lines in between them. In -this case, you can use CHECK: and CHECK-NEXT: directives to specify this. If -you specified a custom check prefix, just use "<PREFIX>-NEXT:". For -example, something like this works as you'd expect:

- -
-
-define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) {
-	%tmp3 = load <2 x double>* %A, align 16
-	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
-	%tmp9 = shufflevector <2 x double> %tmp3,
-                              <2 x double> %tmp7,
-                              <2 x i32> < i32 0, i32 2 >
-	store <2 x double> %tmp9, <2 x double>* %r, align 16
-	ret void
-        
-; CHECK: t2:
-; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movapd	(%eax), %xmm0
-; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movl	4(%esp), %eax
-; CHECK-NEXT: 	movapd	%xmm0, (%eax)
-; CHECK-NEXT: 	ret
-}
-
-
- -

CHECK-NEXT: directives reject the input unless there is exactly one newline -between it an the previous directive. A CHECK-NEXT cannot be the first -directive in a file.

- -
- - -

- The "CHECK-NOT:" directive -

- -
- -

The CHECK-NOT: directive is used to verify that a string doesn't occur -between two matches (or the first match and the beginning of the file). For -example, to verify that a load is removed by a transformation, a test like this -can be used:

- -
-
-define i8 @coerce_offset0(i32 %V, i32* %P) {
-  store i32 %V, i32* %P
-   
-  %P2 = bitcast i32* %P to i8*
-  %P3 = getelementptr i8* %P2, i32 2
-
-  %A = load i8* %P3
-  ret i8 %A
-; CHECK: @coerce_offset0
-; CHECK-NOT: load
-; CHECK: ret i8
-}
-
-
- -
- - -

- FileCheck Pattern Matching Syntax -

- -
- - - -

The CHECK: and CHECK-NOT: directives both take a pattern to match. For most -uses of FileCheck, fixed string matching is perfectly sufficient. For some -things, a more flexible form of matching is desired. To support this, FileCheck -allows you to specify regular expressions in matching strings, surrounded by -double braces: {{yourregex}}. Because we want to use fixed string -matching for a majority of what we do, FileCheck has been designed to support -mixing and matching fixed string matching with regular expressions. This allows -you to write things like this:

- -
-
-; CHECK: movhpd	{{[0-9]+}}(%esp), {{%xmm[0-7]}}
-
-
- -

In this case, any offset from the ESP register will be allowed, and any xmm -register will be allowed.

- -

Because regular expressions are enclosed with double braces, they are -visually distinct, and you don't need to use escape characters within the double -braces like you would in C. In the rare case that you want to match double -braces explicitly from the input, you can use something ugly like -{{[{][{]}} as your pattern.

- - - -
- - -

- FileCheck Variables -

- -
- - - - -

It is often useful to match a pattern and then verify that it occurs again -later in the file. For codegen tests, this can be useful to allow any register, -but verify that that register is used consistently later. To do this, FileCheck -allows named variables to be defined and substituted into patterns. Here is a -simple example:

- -
-
-; CHECK: test5:
-; CHECK:    notw	[[REGISTER:%[a-z]+]]
-; CHECK:    andw	{{.*}}[[REGISTER]]
-
-
- -

The first check line matches a regex (%[a-z]+) and captures it into -the variables "REGISTER". The second line verifies that whatever is in REGISTER -occurs later in the file after an "andw". FileCheck variable references are -always contained in [[ ]] pairs, are named, and their names can be -formed with the regex "[a-zA-Z][a-zA-Z0-9]*". If a colon follows the -name, then it is a definition of the variable, if not, it is a use.

- -

FileCheck variables can be defined multiple times, and uses always get the -latest value. Note that variables are all read at the start of a "CHECK" line -and are all defined at the end. This means that if you have something like -"CHECK: [[XYZ:.*]]x[[XYZ]]" that the check line will read the previous -value of the XYZ variable and define a new one after the match is performed. If -you need to do something like this you can probably take advantage of the fact -that FileCheck is not actually line-oriented when it matches, this allows you to -define two separate CHECK lines that match on the same line. -

- - - -
- -
- - -

Variables and substitutions

- -
-

With a RUN line there are a number of substitutions that are permitted. In - general, any Tcl variable that is available in the substitute - function (in test/lib/llvm.exp) can be substituted into a RUN line. - To make a substitution just write the variable's name preceded by a $. - Additionally, for compatibility reasons with previous versions of the test - library, certain names can be accessed with an alternate syntax: a % prefix. - These alternates are deprecated and may go away in a future version. -

-

Here are the available variable names. The alternate syntax is listed in - parentheses.

- -
-
$test (%s)
-
The full path to the test case's source. This is suitable for passing - on the command line as the input to an llvm tool.
- -
$srcdir
-
The source directory from where the "make check" was run.
- -
objdir
-
The object directory that corresponds to the $srcdir.
- -
subdir
-
A partial path from the test directory that contains the - sub-directory that contains the test source being executed.
- -
srcroot
-
The root directory of the LLVM src tree.
- -
objroot
-
The root directory of the LLVM object tree. This could be the same - as the srcroot.
- -
path
-
The path to the directory that contains the test case source. This is - for locating any supporting files that are not generated by the test, but - used by the test.
- -
tmp
-
The path to a temporary file name that could be used for this test case. - The file name won't conflict with other test cases. You can append to it if - you need multiple temporaries. This is useful as the destination of some - redirected output.
- -
target_triplet (%target_triplet)
-
The target triplet that corresponds to the current host machine (the one - running the test cases). This should probably be called "host".
- -
link (%link)
-
This full link command used to link LLVM executables. This has all the - configured -I, -L and -l options.
- -
shlibext (%shlibext)
-
The suffix for the host platforms share library (dll) files. This - includes the period as the first character.
-
-

To add more variables, two things need to be changed. First, add a line in - the test/Makefile that creates the site.exp file. This will - "set" the variable as a global in the site.exp file. Second, in the - test/lib/llvm.exp file, in the substitute proc, add the variable name - to the list of "global" declarations at the beginning of the proc. That's it, - the variable can then be used in test scripts.

-
- - -

Other Features

- -
-

To make RUN line writing easier, there are several shell scripts located - in the llvm/test/Scripts directory. This directory is in the PATH - when running tests, so you can just call these scripts using their name. For - example:

-
-
ignore
-
This script runs its arguments and then always returns 0. This is useful - in cases where the test needs to cause a tool to generate an error (e.g. to - check the error output). However, any program in a pipeline that returns a - non-zero result will cause the test to fail. This script overcomes that - issue and nicely documents that the test case is purposefully ignoring the - result code of the tool
- -
not
-
This script runs its arguments and then inverts the result code from - it. Zero result codes become 1. Non-zero result codes become 0. This is - useful to invert the result of a grep. For example "not grep X" means - succeed only if you don't find X in the input.
-
- -

Sometimes it is necessary to mark a test case as "expected fail" or XFAIL. - You can easily mark a test as XFAIL just by including XFAIL: on a - line near the top of the file. This signals that the test case should succeed - if the test fails. Such test cases are counted separately by the testing - tool. To specify an expected fail, use the XFAIL keyword in the comments of - the test program followed by a colon and one or more failure patterns. Each - failure pattern can be either '*' (to specify fail everywhere), or a part of a - target triple (indicating the test should fail on that platform), or the name - of a configurable feature (for example, "loadable_module"). If there is a - match, the test is expected to fail. If not, the test is expected to - succeed. To XFAIL everywhere just specify XFAIL: *. Here is an - example of an XFAIL line:

- -
-
-; XFAIL: darwin,sun
-
-
- -

To make the output more useful, the llvm_runtest function wil - scan the lines of the test case for ones that contain a pattern that matches - PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number that - is related to the test case. The number after "PR" specifies the LLVM bugzilla - number. When a PR number is specified, it will be used in the pass/fail - reporting. This is useful to quickly get some context when a test fails.

- -

Finally, any line that contains "END." will cause the special - interpretation of lines to terminate. This is generally done right after the - last RUN: line. This has two side effects: (a) it prevents special - interpretation of lines that are part of the test program, not the - instructions to the test case, and (b) it speeds things up for really big test - cases by avoiding interpretation of the remainder of the file.

- -
- -
- - -

test-suite Overview

- - -
- -

The test-suite module contains a number of programs that can be -compiled and executed. The test-suite includes reference outputs for -all of the programs, so that the output of the executed program can be checked -for correctness.

- -

test-suite tests are divided into three types of tests: MultiSource, -SingleSource, and External.

- -
    -
  • test-suite/SingleSource -

    The SingleSource directory contains test programs that are only a single -source file in size. These are usually small benchmark programs or small -programs that calculate a particular value. Several such programs are grouped -together in each directory.

  • - -
  • test-suite/MultiSource -

    The MultiSource directory contains subdirectories which contain entire -programs with multiple source files. Large benchmarks and whole applications -go here.

  • - -
  • test-suite/External -

    The External directory contains Makefiles for building code that is external -to (i.e., not distributed with) LLVM. The most prominent members of this -directory are the SPEC 95 and SPEC 2000 benchmark suites. The External -directory does not contain these actual tests, but only the Makefiles that know -how to properly compile these programs from somewhere else. When -using LNT, use the --test-externals option to include these -tests in the results.

  • -
-
- - -

test-suite Quickstart

- - -
-

The modern way of running the test-suite is focused on testing and -benchmarking complete compilers using -the LNT testing infrastructure.

- -

For more information on using LNT to execute the test-suite, please -see the LNT Quickstart -documentation.

-
- - -

test-suite Makefiles

- - -
-

Historically, the test-suite was executed using a complicated setup -of Makefiles. The LNT based approach above is recommended for most users, but -there are some testing scenarios which are not supported by the LNT approach. In -addition, LNT currently uses the Makefile setup under the covers and so -developers who are interested in how LNT works under the hood may want to -understand the Makefile based setup.

- -

For more information on the test-suite Makefile setup, please see -the Test Suite Makefile Guide.

-
- - - -
-
- Valid CSS - Valid HTML 4.01 - - John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-11-07 18:00:18 +0100 (Wed, 07 Nov 2012) $ -
- - diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst new file mode 100644 index 000000000000..79cedee764f7 --- /dev/null +++ b/docs/TestingGuide.rst @@ -0,0 +1,455 @@ +================================= +LLVM Testing Infrastructure Guide +================================= + +.. contents:: + :local: + +.. toctree:: + :hidden: + + TestSuiteMakefileGuide + +Overview +======== + +This document is the reference manual for the LLVM testing +infrastructure. It documents the structure of the LLVM testing +infrastructure, the tools needed to use it, and how to add and run +tests. + +Requirements +============ + +In order to use the LLVM testing infrastructure, you will need all of +the software required to build LLVM, as well as +`Python `_ 2.4 or later. + +LLVM testing infrastructure organization +======================================== + +The LLVM testing infrastructure contains two major categories of tests: +regression tests and whole programs. The regression tests are contained +inside the LLVM repository itself under ``llvm/test`` and are expected +to always pass -- they should be run before every commit. + +The whole programs tests are referred to as the "LLVM test suite" (or +"test-suite") and are in the ``test-suite`` module in subversion. For +historical reasons, these tests are also referred to as the "nightly +tests" in places, which is less ambiguous than "test-suite" and remains +in use although we run them much more often than nightly. + +Regression tests +---------------- + +The regression tests are small pieces of code that test a specific +feature of LLVM or trigger a specific bug in LLVM. The language they are +written in depends on the part of LLVM being tested. These tests are driven by +the :doc:`Lit ` testing tool (which is part of LLVM), and +are located in the ``llvm/test`` directory. + +Typically when a bug is found in LLVM, a regression test containing just +enough code to reproduce the problem should be written and placed +somewhere underneath this directory. For example, it can be a small +piece of LLVM IR distilled from an actual application or benchmark. + +``test-suite`` +-------------- + +The test suite contains whole programs, which are pieces of code which +can be compiled and linked into a stand-alone program that can be +executed. These programs are generally written in high level languages +such as C or C++. + +These programs are compiled using a user specified compiler and set of +flags, and then executed to capture the program output and timing +information. The output of these programs is compared to a reference +output to ensure that the program is being compiled correctly. + +In addition to compiling and executing programs, whole program tests +serve as a way of benchmarking LLVM performance, both in terms of the +efficiency of the programs generated as well as the speed with which +LLVM compiles, optimizes, and generates code. + +The test-suite is located in the ``test-suite`` Subversion module. + +Debugging Information tests +--------------------------- + +The test suite contains tests to check quality of debugging information. +The test are written in C based languages or in LLVM assembly language. + +These tests are compiled and run under a debugger. The debugger output +is checked to validate of debugging information. See README.txt in the +test suite for more information . This test suite is located in the +``debuginfo-tests`` Subversion module. + +Quick start +=========== + +The tests are located in two separate Subversion modules. The +regressions tests are in the main "llvm" module under the directory +``llvm/test`` (so you get these tests for free with the main LLVM tree). +Use ``make check-all`` to run the regression tests after building LLVM. + +The more comprehensive test suite that includes whole programs in C and C++ +is in the ``test-suite`` module. See :ref:`test-suite Quickstart +` for more information on running these tests. + +Regression tests +---------------- + +To run all of the LLVM regression tests, use the master Makefile in the +``llvm/test`` directory. LLVM Makefiles require GNU Make (read the :doc:`LLVM +Makefile Guide ` for more details): + +.. code-block:: bash + + % make -C llvm/test + +or: + +.. code-block:: bash + + % make check + +If you have `Clang `_ checked out and built, you +can run the LLVM and Clang tests simultaneously using: + +.. code-block:: bash + + % make check-all + +To run the tests with Valgrind (Memcheck by default), just append +``VG=1`` to the commands above, e.g.: + +.. code-block:: bash + + % make check VG=1 + +To run individual tests or subsets of tests, you can use the ``llvm-lit`` +script which is built as part of LLVM. For example, to run the +``Integer/BitPacked.ll`` test by itself you can run: + +.. code-block:: bash + + % llvm-lit ~/llvm/test/Integer/BitPacked.ll + +or to run all of the ARM CodeGen tests: + +.. code-block:: bash + + % llvm-lit ~/llvm/test/CodeGen/ARM + +For more information on using the :program:`lit` tool, see ``llvm-lit --help`` +or the :doc:`lit man page `. + +Debugging Information tests +--------------------------- + +To run debugging information tests simply checkout the tests inside +clang/test directory. + +.. code-block:: bash + + % cd clang/test + % svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests + +These tests are already set up to run as part of clang regression tests. + +Regression test structure +========================= + +The LLVM regression tests are driven by :program:`lit` and are located in the +``llvm/test`` directory. + +This directory contains a large array of small tests that exercise +various features of LLVM and to ensure that regressions do not occur. +The directory is broken into several sub-directories, each focused on a +particular area of LLVM. + +Writing new regression tests +---------------------------- + +The regression test structure is very simple, but does require some +information to be set. This information is gathered via ``configure`` +and is written to a file, ``test/lit.site.cfg`` in the build directory. +The ``llvm/test`` Makefile does this work for you. + +In order for the regression tests to work, each directory of tests must +have a ``lit.local.cfg`` file. :program:`lit` looks for this file to determine +how to run the tests. This file is just Python code and thus is very +flexible, but we've standardized it for the LLVM regression tests. If +you're adding a directory of tests, just copy ``lit.local.cfg`` from +another directory to get running. The standard ``lit.local.cfg`` simply +specifies which files to look in for tests. Any directory that contains +only directories does not need the ``lit.local.cfg`` file. Read the :doc:`Lit +documentation ` for more information. + +Each test file must contain lines starting with "RUN:" that tell :program:`lit` +how to run it. If there are no RUN lines, :program:`lit` will issue an error +while running a test. + +RUN lines are specified in the comments of the test program using the +keyword ``RUN`` followed by a colon, and lastly the command (pipeline) +to execute. Together, these lines form the "script" that :program:`lit` +executes to run the test case. The syntax of the RUN lines is similar to a +shell's syntax for pipelines including I/O redirection and variable +substitution. However, even though these lines may *look* like a shell +script, they are not. RUN lines are interpreted by :program:`lit`. +Consequently, the syntax differs from shell in a few ways. You can specify +as many RUN lines as needed. + +:program:`lit` performs substitution on each RUN line to replace LLVM tool names +with the full paths to the executable built for each tool (in +``$(LLVM_OBJ_ROOT)/$(BuildMode)/bin)``. This ensures that :program:`lit` does +not invoke any stray LLVM tools in the user's path during testing. + +Each RUN line is executed on its own, distinct from other lines unless +its last character is ``\``. This continuation character causes the RUN +line to be concatenated with the next one. In this way you can build up +long pipelines of commands without making huge line lengths. The lines +ending in ``\`` are concatenated until a RUN line that doesn't end in +``\`` is found. This concatenated set of RUN lines then constitutes one +execution. :program:`lit` will substitute variables and arrange for the pipeline +to be executed. If any process in the pipeline fails, the entire line (and +test case) fails too. + +Below is an example of legal RUN lines in a ``.ll`` file: + +.. code-block:: llvm + + ; RUN: llvm-as < %s | llvm-dis > %t1 + ; RUN: llvm-dis < %s.bc-13 > %t2 + ; RUN: diff %t1 %t2 + +As with a Unix shell, the RUN lines permit pipelines and I/O +redirection to be used. + +There are some quoting rules that you must pay attention to when writing +your RUN lines. In general nothing needs to be quoted. :program:`lit` won't +strip off any quote characters so they will get passed to the invoked program. +To avoid this use curly braces to tell :program:`lit` that it should treat +everything enclosed as one value. + +In general, you should strive to keep your RUN lines as simple as possible, +using them only to run tools that generate textual output you can then examine. +The recommended way to examine output to figure out if the test passes is using +the :doc:`FileCheck tool `. *[The usage of grep in RUN +lines is deprecated - please do not send or commit patches that use it.]* + +Fragile tests +------------- + +It is easy to write a fragile test that would fail spuriously if the tool being +tested outputs a full path to the input file. For example, :program:`opt` by +default outputs a ``ModuleID``: + +.. code-block:: console + + $ cat example.ll + define i32 @main() nounwind { + ret i32 0 + } + + $ opt -S /path/to/example.ll + ; ModuleID = '/path/to/example.ll' + + define i32 @main() nounwind { + ret i32 0 + } + +``ModuleID`` can unexpetedly match against ``CHECK`` lines. For example: + +.. code-block:: llvm + + ; RUN: opt -S %s | FileCheck + + define i32 @main() nounwind { + ; CHECK-NOT: load + ret i32 0 + } + +This test will fail if placed into a ``download`` directory. + +To make your tests robust, always use ``opt ... < %s`` in the RUN line. +:program:`opt` does not output a ``ModuleID`` when input comes from stdin. + +Variables and substitutions +--------------------------- + +With a RUN line there are a number of substitutions that are permitted. +To make a substitution just write the variable's name preceded by a ``$``. +Additionally, for compatibility reasons with previous versions of the +test library, certain names can be accessed with an alternate syntax: a +% prefix. These alternates are deprecated and may go away in a future +version. + +Here are the available variable names. The alternate syntax is listed in +parentheses. + +``$test`` (``%s``) + The full path to the test case's source. This is suitable for passing on + the command line as the input to an LLVM tool. + +``%(line)``, ``%(line+)``, ``%(line-)`` + The number of the line where this variable is used, with an optional + integer offset. This can be used in tests with multiple RUN lines, + which reference test file's line numbers. + +``$srcdir`` + The source directory from where the ``make check`` was run. + +``objdir`` + The object directory that corresponds to the ``$srcdir``. + +``subdir`` + A partial path from the ``test`` directory that contains the + sub-directory that contains the test source being executed. + +``srcroot`` + The root directory of the LLVM src tree. + +``objroot`` + The root directory of the LLVM object tree. This could be the same as + the srcroot. + +``path`` + The path to the directory that contains the test case source. This is + for locating any supporting files that are not generated by the test, + but used by the test. + +``tmp`` + The path to a temporary file name that could be used for this test case. + The file name won't conflict with other test cases. You can append to it + if you need multiple temporaries. This is useful as the destination of + some redirected output. + +``target_triplet`` (``%target_triplet``) + The target triplet that corresponds to the current host machine (the one + running the test cases). This should probably be called "host". + +``link`` (``%link``) + This full link command used to link LLVM executables. This has all the + configured ``-I``, ``-L`` and ``-l`` options. + +``shlibext`` (``%shlibext``) + The suffix for the host platforms shared library (DLL) files. This + includes the period as the first character. + +To add more variables, look at ``test/lit.cfg``. + +Other Features +-------------- + +To make RUN line writing easier, there are several helper scripts and programs +in the ``llvm/test/Scripts`` directory. This directory is in the PATH +when running tests, so you can just call these scripts using their name. +For example: + +``ignore`` + This script runs its arguments and then always returns 0. This is useful + in cases where the test needs to cause a tool to generate an error (e.g. + to check the error output). However, any program in a pipeline that + returns a non-zero result will cause the test to fail. This script + overcomes that issue and nicely documents that the test case is + purposefully ignoring the result code of the tool +``not`` + This script runs its arguments and then inverts the result code from it. + Zero result codes become 1. Non-zero result codes become 0. + +Sometimes it is necessary to mark a test case as "expected fail" or +XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:`` +on a line near the top of the file. This signals that the test case +should succeed if the test fails. Such test cases are counted separately +by the testing tool. To specify an expected fail, use the XFAIL keyword +in the comments of the test program followed by a colon and one or more +failure patterns. Each failure pattern can be either ``*`` (to specify +fail everywhere), or a part of a target triple (indicating the test +should fail on that platform), or the name of a configurable feature +(for example, ``loadable_module``). If there is a match, the test is +expected to fail. If not, the test is expected to succeed. To XFAIL +everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL`` +line: + +.. code-block:: llvm + + ; XFAIL: darwin,sun + +To make the output more useful, :program:`lit` will scan +the lines of the test case for ones that contain a pattern that matches +``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number +that is related to the test case. The number after "PR" specifies the +LLVM bugzilla number. When a PR number is specified, it will be used in +the pass/fail reporting. This is useful to quickly get some context when +a test fails. + +Finally, any line that contains "END." will cause the special +interpretation of lines to terminate. This is generally done right after +the last RUN: line. This has two side effects: + +(a) it prevents special interpretation of lines that are part of the test + program, not the instructions to the test case, and + +(b) it speeds things up for really big test cases by avoiding + interpretation of the remainder of the file. + +``test-suite`` Overview +======================= + +The ``test-suite`` module contains a number of programs that can be +compiled and executed. The ``test-suite`` includes reference outputs for +all of the programs, so that the output of the executed program can be +checked for correctness. + +``test-suite`` tests are divided into three types of tests: MultiSource, +SingleSource, and External. + +- ``test-suite/SingleSource`` + + The SingleSource directory contains test programs that are only a + single source file in size. These are usually small benchmark + programs or small programs that calculate a particular value. Several + such programs are grouped together in each directory. + +- ``test-suite/MultiSource`` + + The MultiSource directory contains subdirectories which contain + entire programs with multiple source files. Large benchmarks and + whole applications go here. + +- ``test-suite/External`` + + The External directory contains Makefiles for building code that is + external to (i.e., not distributed with) LLVM. The most prominent + members of this directory are the SPEC 95 and SPEC 2000 benchmark + suites. The ``External`` directory does not contain these actual + tests, but only the Makefiles that know how to properly compile these + programs from somewhere else. When using ``LNT``, use the + ``--test-externals`` option to include these tests in the results. + +.. _test-suite-quickstart: + +``test-suite`` Quickstart +------------------------- + +The modern way of running the ``test-suite`` is focused on testing and +benchmarking complete compilers using the +`LNT `_ testing infrastructure. + +For more information on using LNT to execute the ``test-suite``, please +see the `LNT Quickstart `_ +documentation. + +``test-suite`` Makefiles +------------------------ + +Historically, the ``test-suite`` was executed using a complicated setup +of Makefiles. The LNT based approach above is recommended for most +users, but there are some testing scenarios which are not supported by +the LNT approach. In addition, LNT currently uses the Makefile setup +under the covers and so developers who are interested in how LNT works +under the hood may want to understand the Makefile based setup. + +For more information on the ``test-suite`` Makefile setup, please see +the :doc:`Test Suite Makefile Guide `. diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst new file mode 100644 index 000000000000..e2d3667bc116 --- /dev/null +++ b/docs/Vectorizers.rst @@ -0,0 +1,338 @@ +========================== +Auto-Vectorization in LLVM +========================== + +.. contents:: + :local: + +LLVM has two vectorizers: The :ref:`Loop Vectorizer `, +which operates on Loops, and the :ref:`Basic Block Vectorizer +`, which optimizes straight-line code. These vectorizers +focus on different optimization opportunities and use different techniques. +The BB vectorizer merges multiple scalars that are found in the code into +vectors while the Loop Vectorizer widens instructions in the original loop +to operate on multiple consecutive loop iterations. + +.. _loop-vectorizer: + +The Loop Vectorizer +=================== + +Usage +----- + +LLVM's Loop Vectorizer is now available and will be useful for many people. +It is not enabled by default, but can be enabled through clang using the +command line flag: + +.. code-block:: console + + $ clang -fvectorize -O3 file.c + +If the ``-fvectorize`` flag is used then the loop vectorizer will be enabled +when running with ``-O3``, ``-O2``. When ``-Os`` is used, the loop vectorizer +will only vectorize loops that do not require a major increase in code size. + +We plan to enable the Loop Vectorizer by default as part of the LLVM 3.3 release. + +Command line flags +^^^^^^^^^^^^^^^^^^ + +The loop vectorizer uses a cost model to decide on the optimal vectorization factor +and unroll factor. However, users of the vectorizer can force the vectorizer to use +specific values. Both 'clang' and 'opt' support the flags below. + +Users can control the vectorization SIMD width using the command line flag "-force-vector-width". + +.. code-block:: console + + $ clang -mllvm -force-vector-width=8 ... + $ opt -loop-vectorize -force-vector-width=8 ... + +Users can control the unroll factor using the command line flag "-force-vector-unroll" + +.. code-block:: console + + $ clang -mllvm -force-vector-unroll=2 ... + $ opt -loop-vectorize -force-vector-unroll=2 ... + +Features +-------- + +The LLVM Loop Vectorizer has a number of features that allow it to vectorize +complex loops. + +Loops with unknown trip count +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Loop Vectorizer supports loops with an unknown trip count. +In the loop below, the iteration ``start`` and ``finish`` points are unknown, +and the Loop Vectorizer has a mechanism to vectorize loops that do not start +at zero. In this example, 'n' may not be a multiple of the vector width, and +the vectorizer has to execute the last few iterations as scalar code. Keeping +a scalar copy of the loop increases the code size. + +.. code-block:: c++ + + void bar(float *A, float* B, float K, int start, int end) { + for (int i = start; i < end; ++i) + A[i] *= B[i] + K; + } + +Runtime Checks of Pointers +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the example below, if the pointers A and B point to consecutive addresses, +then it is illegal to vectorize the code because some elements of A will be +written before they are read from array B. + +Some programmers use the 'restrict' keyword to notify the compiler that the +pointers are disjointed, but in our example, the Loop Vectorizer has no way of +knowing that the pointers A and B are unique. The Loop Vectorizer handles this +loop by placing code that checks, at runtime, if the arrays A and B point to +disjointed memory locations. If arrays A and B overlap, then the scalar version +of the loop is executed. + +.. code-block:: c++ + + void bar(float *A, float* B, float K, int n) { + for (int i = 0; i < n; ++i) + A[i] *= B[i] + K; + } + + +Reductions +^^^^^^^^^^ + +In this example the ``sum`` variable is used by consecutive iterations of +the loop. Normally, this would prevent vectorization, but the vectorizer can +detect that 'sum' is a reduction variable. The variable 'sum' becomes a vector +of integers, and at the end of the loop the elements of the array are added +together to create the correct result. We support a number of different +reduction operations, such as addition, multiplication, XOR, AND and OR. + +.. code-block:: c++ + + int foo(int *A, int *B, int n) { + unsigned sum = 0; + for (int i = 0; i < n; ++i) + sum += A[i] + 5; + return sum; + } + +We support floating point reduction operations when `-ffast-math` is used. + +Inductions +^^^^^^^^^^ + +In this example the value of the induction variable ``i`` is saved into an +array. The Loop Vectorizer knows to vectorize induction variables. + +.. code-block:: c++ + + void bar(float *A, float* B, float K, int n) { + for (int i = 0; i < n; ++i) + A[i] = i; + } + +If Conversion +^^^^^^^^^^^^^ + +The Loop Vectorizer is able to "flatten" the IF statement in the code and +generate a single stream of instructions. The Loop Vectorizer supports any +control flow in the innermost loop. The innermost loop may contain complex +nesting of IFs, ELSEs and even GOTOs. + +.. code-block:: c++ + + int foo(int *A, int *B, int n) { + unsigned sum = 0; + for (int i = 0; i < n; ++i) + if (A[i] > B[i]) + sum += A[i] + 5; + return sum; + } + +Pointer Induction Variables +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This example uses the "accumulate" function of the standard c++ library. This +loop uses C++ iterators, which are pointers, and not integer indices. +The Loop Vectorizer detects pointer induction variables and can vectorize +this loop. This feature is important because many C++ programs use iterators. + +.. code-block:: c++ + + int baz(int *A, int n) { + return std::accumulate(A, A + n, 0); + } + +Reverse Iterators +^^^^^^^^^^^^^^^^^ + +The Loop Vectorizer can vectorize loops that count backwards. + +.. code-block:: c++ + + int foo(int *A, int *B, int n) { + for (int i = n; i > 0; --i) + A[i] +=1; + } + +Scatter / Gather +^^^^^^^^^^^^^^^^ + +The Loop Vectorizer can vectorize code that becomes a sequence of scalar instructions +that scatter/gathers memory. + +.. code-block:: c++ + + int foo(int *A, int *B, int n, int k) { + for (int i = 0; i < n; ++i) + A[i*7] += B[i*k]; + } + +Vectorization of Mixed Types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Loop Vectorizer can vectorize programs with mixed types. The Vectorizer +cost model can estimate the cost of the type conversion and decide if +vectorization is profitable. + +.. code-block:: c++ + + int foo(int *A, char *B, int n, int k) { + for (int i = 0; i < n; ++i) + A[i] += 4 * B[i]; + } + +Global Structures Alias Analysis +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Access to global structures can also be vectorized, with alias analysis being +used to make sure accesses don't alias. Run-time checks can also be added on +pointer access to structure members. + +Many variations are supported, but some that rely on undefined behaviour being +ignored (as other compilers do) are still being left un-vectorized. + +.. code-block:: c++ + + struct { int A[100], K, B[100]; } Foo; + + int foo() { + for (int i = 0; i < 100; ++i) + Foo.A[i] = Foo.B[i] + 100; + } + +Vectorization of function calls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Loop Vectorize can vectorize intrinsic math functions. +See the table below for a list of these functions. + ++-----+-----+---------+ +| pow | exp | exp2 | ++-----+-----+---------+ +| sin | cos | sqrt | ++-----+-----+---------+ +| log |log2 | log10 | ++-----+-----+---------+ +|fabs |floor| ceil | ++-----+-----+---------+ +|fma |trunc|nearbyint| ++-----+-----+---------+ +| | | fmuladd | ++-----+-----+---------+ + +The loop vectorizer knows about special instructions on the target and will +vectorize a loop containing a function call that maps to the instructions. For +example, the loop below will be vectorized on Intel x86 if the SSE4.1 roundps +instruction is available. + +.. code-block:: c++ + + void foo(float *f) { + for (int i = 0; i != 1024; ++i) + f[i] = floorf(f[i]); + } + +Partial unrolling during vectorization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Modern processors feature multiple execution units, and only programs that contain a +high degree of parallelism can fully utilize the entire width of the machine. +The Loop Vectorizer increases the instruction level parallelism (ILP) by +performing partial-unrolling of loops. + +In the example below the entire array is accumulated into the variable 'sum'. +This is inefficient because only a single execution port can be used by the processor. +By unrolling the code the Loop Vectorizer allows two or more execution ports +to be used simultaneously. + +.. code-block:: c++ + + int foo(int *A, int *B, int n) { + unsigned sum = 0; + for (int i = 0; i < n; ++i) + sum += A[i]; + return sum; + } + +The Loop Vectorizer uses a cost model to decide when it is profitable to unroll loops. +The decision to unroll the loop depends on the register pressure and the generated code size. + +Performance +----------- + +This section shows the the execution time of Clang on a simple benchmark: +`gcc-loops `_. +This benchmarks is a collection of loops from the GCC autovectorization +`page `_ by Dorit Nuzman. + +The chart below compares GCC-4.7, ICC-13, and Clang-SVN with and without loop vectorization at -O3, tuned for "corei7-avx", running on a Sandybridge iMac. +The Y-axis shows the time in msec. Lower is better. The last column shows the geomean of all the kernels. + +.. image:: gcc-loops.png + +And Linpack-pc with the same configuration. Result is Mflops, higher is better. + +.. image:: linpack-pc.png + +.. _bb-vectorizer: + +The Basic Block Vectorizer +========================== + +Usage +------ + +The Basic Block Vectorizer is not enabled by default, but it can be enabled +through clang using the command line flag: + +.. code-block:: console + + $ clang -fslp-vectorize file.c + +Details +------- + +The goal of basic-block vectorization (a.k.a. superword-level parallelism) is +to combine similar independent instructions within simple control-flow regions +into vector instructions. Memory accesses, arithemetic operations, comparison +operations and some math functions can all be vectorized using this technique +(subject to the capabilities of the target architecture). + +For example, the following function performs very similar operations on its +inputs (a1, b1) and (a2, b2). The basic-block vectorizer may combine these +into vector operations. + +.. code-block:: c++ + + int foo(int a1, int a2, int b1, int b2) { + int r1 = a1*(a1 + b1)/b1 + 50*b1/a1; + int r2 = a2*(a2 + b2)/b2 + 50*b2/a2; + return r1 + r2; + } + + diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html deleted file mode 100644 index b7fdce490472..000000000000 --- a/docs/WritingAnLLVMBackend.html +++ /dev/null @@ -1,2557 +0,0 @@ - - - - - Writing an LLVM Compiler Backend - - - - - -

- Writing an LLVM Compiler Backend -

- -
    -
  1. Introduction - -
  2. Target Machine
  3. -
  4. Target Registration
  5. -
  6. Register Set and Register Classes -
  7. -
  8. Instruction Set -
  9. -
  10. Instruction Selector -
  11. -
  12. Assembly Printer
  13. -
  14. Subtarget Support
  15. -
  16. JIT Support -
  17. -
- -
-

Written by Mason Woo and - Misha Brukman

-
- - -

- Introduction -

- - -
- -

-This document describes techniques for writing compiler backends that convert -the LLVM Intermediate Representation (IR) to code for a specified machine or -other languages. Code intended for a specific machine can take the form of -either assembly code or binary code (usable for a JIT compiler). -

- -

-The backend of LLVM features a target-independent code generator that may create -output for several types of target CPUs — including X86, PowerPC, ARM, -and SPARC. The backend may also be used to generate code targeted at SPUs of the -Cell processor or GPUs to support the execution of compute kernels. -

- -

-The document focuses on existing examples found in subdirectories -of llvm/lib/Target in a downloaded LLVM release. In particular, this -document focuses on the example of creating a static compiler (one that emits -text assembly) for a SPARC target, because SPARC has fairly standard -characteristics, such as a RISC instruction set and straightforward calling -conventions. -

- -

- Audience -

- -
- -

-The audience for this document is anyone who needs to write an LLVM backend to -generate code for a specific hardware or software target. -

- -
- -

- Prerequisite Reading -

- -
- -

-These essential documents must be read before reading this document: -

- -
    -
  • LLVM Language Reference - Manual — a reference manual for the LLVM assembly language.
  • - -
  • The LLVM - Target-Independent Code Generator — a guide to the components - (classes and code generation algorithms) for translating the LLVM internal - representation into machine code for a specified target. Pay particular - attention to the descriptions of code generation stages: Instruction - Selection, Scheduling and Formation, SSA-based Optimization, Register - Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations, - and Code Emission.
  • - -
  • TableGen - Fundamentals —a document that describes the TableGen - (tblgen) application that manages domain-specific information to - support LLVM code generation. TableGen processes input from a target - description file (.td suffix) and generates C++ code that can be - used for code generation.
  • - -
  • Writing an LLVM - Pass — The assembly printer is a FunctionPass, as are - several SelectionDAG processing steps.
  • -
- -

-To follow the SPARC examples in this document, have a copy of -The SPARC Architecture -Manual, Version 8 for reference. For details about the ARM instruction -set, refer to the ARM Architecture -Reference Manual. For more about the GNU Assembler format -(GAS), see -Using As, -especially for the assembly printer. Using As contains a list of target -machine dependent features. -

- -
- -

- Basic Steps -

- -
- -

-To write a compiler backend for LLVM that converts the LLVM IR to code for a -specified target (machine or other language), follow these steps: -

- -
    -
  • Create a subclass of the TargetMachine class that describes characteristics - of your target machine. Copy existing examples of specific TargetMachine - class and header files; for example, start with - SparcTargetMachine.cpp and SparcTargetMachine.h, but - change the file names for your target. Similarly, change code that - references "Sparc" to reference your target.
  • - -
  • Describe the register set of the target. Use TableGen to generate code for - register definition, register aliases, and register classes from a - target-specific RegisterInfo.td input file. You should also write - additional code for a subclass of the TargetRegisterInfo class that - represents the class register file data used for register allocation and - also describes the interactions between registers.
  • - -
  • Describe the instruction set of the target. Use TableGen to generate code - for target-specific instructions from target-specific versions of - TargetInstrFormats.td and TargetInstrInfo.td. You should - write additional code for a subclass of the TargetInstrInfo class to - represent machine instructions supported by the target machine.
  • - -
  • Describe the selection and conversion of the LLVM IR from a Directed Acyclic - Graph (DAG) representation of instructions to native target-specific - instructions. Use TableGen to generate code that matches patterns and - selects instructions based on additional information in a target-specific - version of TargetInstrInfo.td. Write code - for XXXISelDAGToDAG.cpp, where XXX identifies the specific target, - to perform pattern matching and DAG-to-DAG instruction selection. Also write - code in XXXISelLowering.cpp to replace or remove operations and - data types that are not supported natively in a SelectionDAG.
  • - -
  • Write code for an assembly printer that converts LLVM IR to a GAS format for - your target machine. You should add assembly strings to the instructions - defined in your target-specific version of TargetInstrInfo.td. You - should also write code for a subclass of AsmPrinter that performs the - LLVM-to-assembly conversion and a trivial subclass of TargetAsmInfo.
  • - -
  • Optionally, add support for subtargets (i.e., variants with different - capabilities). You should also write code for a subclass of the - TargetSubtarget class, which allows you to use the -mcpu= - and -mattr= command-line options.
  • - -
  • Optionally, add JIT support and create a machine code emitter (subclass of - TargetJITInfo) that is used to emit binary code directly into memory.
  • -
- -

-In the .cpp and .h. files, initially stub up these methods and -then implement them later. Initially, you may not know which private members -that the class will need and which components will need to be subclassed. -

- -
- -

- Preliminaries -

- -
- -

-To actually create your compiler backend, you need to create and modify a few -files. The absolute minimum is discussed here. But to actually use the LLVM -target-independent code generator, you must perform the steps described in -the LLVM -Target-Independent Code Generator document. -

- -

-First, you should create a subdirectory under lib/Target to hold all -the files related to your target. If your target is called "Dummy," create the -directory lib/Target/Dummy. -

- -

-In this new -directory, create a Makefile. It is easiest to copy a -Makefile of another target and modify it. It should at least contain -the LEVEL, LIBRARYNAME and TARGET variables, and then -include $(LEVEL)/Makefile.common. The library can be -named LLVMDummy (for example, see the MIPS target). Alternatively, you -can split the library into LLVMDummyCodeGen -and LLVMDummyAsmPrinter, the latter of which should be implemented in a -subdirectory below lib/Target/Dummy (for example, see the PowerPC -target). -

- -

-Note that these two naming schemes are hardcoded into llvm-config. -Using any other naming scheme will confuse llvm-config and produce a -lot of (seemingly unrelated) linker errors when linking llc. -

- -

-To make your target actually do something, you need to implement a subclass of -TargetMachine. This implementation should typically be in the file -lib/Target/DummyTargetMachine.cpp, but any file in -the lib/Target directory will be built and should work. To use LLVM's -target independent code generator, you should do what all current machine -backends do: create a subclass of LLVMTargetMachine. (To create a -target from scratch, create a subclass of TargetMachine.) -

- -

-To get LLVM to actually build and link your target, you need to add it to -the TARGETS_TO_BUILD variable. To do this, you modify the configure -script to know about your target when parsing the --enable-targets -option. Search the configure script for TARGETS_TO_BUILD, add your -target to the lists there (some creativity required), and then -reconfigure. Alternatively, you can change autotools/configure.ac and -regenerate configure by running ./autoconf/AutoRegen.sh. -

- -
- -
- - -

- Target Machine -

- - -
- -

-LLVMTargetMachine is designed as a base class for targets implemented -with the LLVM target-independent code generator. The LLVMTargetMachine -class should be specialized by a concrete target class that implements the -various virtual methods. LLVMTargetMachine is defined as a subclass of -TargetMachine in include/llvm/Target/TargetMachine.h. The -TargetMachine class implementation (TargetMachine.cpp) also -processes numerous command-line options. -

- -

-To create a concrete target-specific subclass of LLVMTargetMachine, -start by copying an existing TargetMachine class and header. You -should name the files that you create to reflect your specific target. For -instance, for the SPARC target, name the files SparcTargetMachine.h and -SparcTargetMachine.cpp. -

- -

-For a target machine XXX, the implementation of -XXXTargetMachine must have access methods to obtain objects that -represent target components. These methods are named get*Info, and are -intended to obtain the instruction set (getInstrInfo), register set -(getRegisterInfo), stack frame layout (getFrameInfo), and -similar information. XXXTargetMachine must also implement the -getDataLayout method to access an object with target-specific data -characteristics, such as data type size and alignment requirements. -

- -

-For instance, for the SPARC target, the header file -SparcTargetMachine.h declares prototypes for several get*Info -and getDataLayout methods that simply return a class member. -

- -
-
-namespace llvm {
-
-class Module;
-
-class SparcTargetMachine : public LLVMTargetMachine {
-  const DataLayout DataLayout;       // Calculates type size & alignment
-  SparcSubtarget Subtarget;
-  SparcInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
-  
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
-public:
-  SparcTargetMachine(const Module &M, const std::string &FS);
-
-  virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; }
-  virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; }
-  virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; }
-  virtual const TargetRegisterInfo *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-  virtual const DataLayout *getDataLayout() const { return &DataLayout; }
-  static unsigned getModuleMatchQuality(const Module &M);
-
-  // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
-  virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast);
-};
-
-} // end namespace llvm
-
-
- -
    -
  • getInstrInfo()
  • -
  • getRegisterInfo()
  • -
  • getFrameInfo()
  • -
  • getDataLayout()
  • -
  • getSubtargetImpl()
  • -
- -

For some targets, you also need to support the following methods:

- -
    -
  • getTargetLowering()
  • -
  • getJITInfo()
  • -
- -

-In addition, the XXXTargetMachine constructor should specify a -TargetDescription string that determines the data layout for the target -machine, including characteristics such as pointer size, alignment, and -endianness. For example, the constructor for SparcTargetMachine contains the -following: -

- -
-
-SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
-  : DataLayout("E-p:32:32-f128:128:128"),
-    Subtarget(M, FS), InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
-}
-
-
- -

Hyphens separate portions of the TargetDescription string.

- -
    -
  • An upper-case "E" in the string indicates a big-endian target data - model. a lower-case "e" indicates little-endian.
  • - -
  • "p:" is followed by pointer information: size, ABI alignment, and - preferred alignment. If only two figures follow "p:", then the - first value is pointer size, and the second value is both ABI and preferred - alignment.
  • - -
  • Then a letter for numeric type alignment: "i", "f", - "v", or "a" (corresponding to integer, floating point, - vector, or aggregate). "i", "v", or "a" are - followed by ABI alignment and preferred alignment. "f" is followed - by three values: the first indicates the size of a long double, then ABI - alignment, and then ABI preferred alignment.
  • -
- -
- - -

- Target Registration -

- - -
- -

-You must also register your target with the TargetRegistry, which is -what other LLVM tools use to be able to lookup and use your target at -runtime. The TargetRegistry can be used directly, but for most targets -there are helper templates which should take care of the work for you.

- -

-All targets should declare a global Target object which is used to -represent the target during registration. Then, in the target's TargetInfo -library, the target should define that object and use -the RegisterTarget template to register the target. For example, the Sparc registration code looks like this: -

- -
-
-Target llvm::TheSparcTarget;
-
-extern "C" void LLVMInitializeSparcTargetInfo() { 
-  RegisterTarget<Triple::sparc, /*HasJIT=*/false>
-    X(TheSparcTarget, "sparc", "Sparc");
-}
-
-
- -

-This allows the TargetRegistry to look up the target by name or by -target triple. In addition, most targets will also register additional features -which are available in separate libraries. These registration steps are -separate, because some clients may wish to only link in some parts of the target --- the JIT code generator does not require the use of the assembler printer, for -example. Here is an example of registering the Sparc assembly printer: -

- -
-
-extern "C" void LLVMInitializeSparcAsmPrinter() { 
-  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
-}
-
-
- -

-For more information, see -"llvm/Target/TargetRegistry.h". -

- -
- - -

- Register Set and Register Classes -

- - -
- -

-You should describe a concrete target-specific class that represents the -register file of a target machine. This class is called XXXRegisterInfo -(where XXX identifies the target) and represents the class register -file data that is used for register allocation. It also describes the -interactions between registers. -

- -

-You also need to define register classes to categorize related registers. A -register class should be added for groups of registers that are all treated the -same way for some instruction. Typical examples are register classes for -integer, floating-point, or vector registers. A register allocator allows an -instruction to use any register in a specified register class to perform the -instruction in a similar manner. Register classes allocate virtual registers to -instructions from these sets, and register classes let the target-independent -register allocator automatically choose the actual registers. -

- -

-Much of the code for registers, including register definition, register aliases, -and register classes, is generated by TableGen from XXXRegisterInfo.td -input files and placed in XXXGenRegisterInfo.h.inc and -XXXGenRegisterInfo.inc output files. Some of the code in the -implementation of XXXRegisterInfo requires hand-coding. -

- - -

- Defining a Register -

- -
- -

-The XXXRegisterInfo.td file typically starts with register definitions -for a target machine. The Register class (specified -in Target.td) is used to define an object for each register. The -specified string n becomes the Name of the register. The -basic Register object does not have any subregisters and does not -specify any aliases. -

- -
-
-class Register<string n> {
-  string Namespace = "";
-  string AsmName = n;
-  string Name = n;
-  int SpillSize = 0;
-  int SpillAlignment = 0;
-  list<Register> Aliases = [];
-  list<Register> SubRegs = [];
-  list<int> DwarfNumbers = [];
-}
-
-
- -

-For example, in the X86RegisterInfo.td file, there are register -definitions that utilize the Register class, such as: -

- -
-
-def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
-
-
- -

-This defines the register AL and assigns it values (with -DwarfRegNum) that are used by gcc, gdb, or a debug -information writer to identify a register. For register -AL, DwarfRegNum takes an array of 3 values representing 3 -different modes: the first element is for X86-64, the second for exception -handling (EH) on X86-32, and the third is generic. -1 is a special Dwarf number -that indicates the gcc number is undefined, and -2 indicates the register number -is invalid for this mode. -

- -

-From the previously described line in the X86RegisterInfo.td file, -TableGen generates this code in the X86GenRegisterInfo.inc file: -

- -
-
-static const unsigned GR8[] = { X86::AL, ... };
-
-const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
-
-const TargetRegisterDesc RegisterDescriptors[] = { 
-  ...
-{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
-
-
- -

-From the register info file, TableGen generates a TargetRegisterDesc -object for each register. TargetRegisterDesc is defined in -include/llvm/Target/TargetRegisterInfo.h with the following fields: -

- -
-
-struct TargetRegisterDesc {
-  const char     *AsmName;      // Assembly language name for the register
-  const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *AliasSet;     // Register Alias Set
-  const unsigned *SubRegs;      // Sub-register set
-  const unsigned *ImmSubRegs;   // Immediate sub-register set
-  const unsigned *SuperRegs;    // Super-register set
-};
-
- -

-TableGen uses the entire target description file (.td) to determine -text names for the register (in the AsmName and Name fields of -TargetRegisterDesc) and the relationships of other registers to the -defined register (in the other TargetRegisterDesc fields). In this -example, other definitions establish the registers "AX", -"EAX", and "RAX" as aliases for one another, so TableGen -generates a null-terminated array (AL_AliasSet) for this register alias -set. -

- -

-The Register class is commonly used as a base class for more complex -classes. In Target.td, the Register class is the base for the -RegisterWithSubRegs class that is used to define registers that need to -specify subregisters in the SubRegs list, as shown here: -

- -
-
-class RegisterWithSubRegs<string n,
-list<Register> subregs> : Register<n> {
-  let SubRegs = subregs;
-}
-
-
- -

-In SparcRegisterInfo.td, additional register classes are defined for -SPARC: a Register subclass, SparcReg, and further subclasses: Ri, -Rf, and Rd. SPARC registers are identified by 5-bit ID -numbers, which is a feature common to these subclasses. Note the use of -'let' expressions to override values that are initially defined in a -superclass (such as SubRegs field in the Rd class). -

- -
-
-class SparcReg<string n> : Register<n> {
-  field bits<5> Num;
-  let Namespace = "SP";
-}
-// Ri - 32-bit integer registers
-class Ri<bits<5> num, string n> :
-SparcReg<n> {
-  let Num = num;
-}
-// Rf - 32-bit floating-point registers
-class Rf<bits<5> num, string n> :
-SparcReg<n> {
-  let Num = num;
-}
-// Rd - Slots in the FP register file for 64-bit
-floating-point values.
-class Rd<bits<5> num, string n,
-list<Register> subregs> : SparcReg<n> {
-  let Num = num;
-  let SubRegs = subregs;
-}
-
-
- -

-In the SparcRegisterInfo.td file, there are register definitions that -utilize these subclasses of Register, such as: -

- -
-
-def G0 : Ri< 0, "G0">,
-DwarfRegNum<[0]>;
-def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
-...
-def F0 : Rf< 0, "F0">,
-DwarfRegNum<[32]>;
-def F1 : Rf< 1, "F1">,
-DwarfRegNum<[33]>;
-...
-def D0 : Rd< 0, "F0", [F0, F1]>,
-DwarfRegNum<[32]>;
-def D1 : Rd< 2, "F2", [F2, F3]>,
-DwarfRegNum<[34]>;
-
-
- -

-The last two registers shown above (D0 and D1) are -double-precision floating-point registers that are aliases for pairs of -single-precision floating-point sub-registers. In addition to aliases, the -sub-register and super-register relationships of the defined register are in -fields of a register's TargetRegisterDesc. -

- -
- - -

- Defining a Register Class -

- -
- -

-The RegisterClass class (specified in Target.td) is used to -define an object that represents a group of related registers and also defines -the default allocation order of the registers. A target description file -XXXRegisterInfo.td that uses Target.td can construct register -classes using the following class: -

- -
-
-class RegisterClass<string namespace,
-list<ValueType> regTypes, int alignment, dag regList> {
-  string Namespace = namespace;
-  list<ValueType> RegTypes = regTypes;
-  int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
-  int Alignment = alignment;
-
-  // CopyCost is the cost of copying a value between two registers
-  // default value 1 means a single instruction
-  // A negative value means copying is extremely expensive or impossible
-  int CopyCost = 1;  
-  dag MemberList = regList;
-  
-  // for register classes that are subregisters of this class
-  list<RegisterClass> SubRegClassList = [];  
-  
-  code MethodProtos = [{}];  // to insert arbitrary code
-  code MethodBodies = [{}];
-}
-
-
- -

To define a RegisterClass, use the following 4 arguments:

- -
    -
  • The first argument of the definition is the name of the namespace.
  • - -
  • The second argument is a list of ValueType register type values - that are defined in include/llvm/CodeGen/ValueTypes.td. Defined - values include integer types (such as i16, i32, - and i1 for Boolean), floating-point types - (f32, f64), and vector types (for example, v8i16 - for an 8 x i16 vector). All registers in a RegisterClass - must have the same ValueType, but some registers may store vector - data in different configurations. For example a register that can process a - 128-bit vector may be able to handle 16 8-bit integer elements, 8 16-bit - integers, 4 32-bit integers, and so on.
  • - -
  • The third argument of the RegisterClass definition specifies the - alignment required of the registers when they are stored or loaded to - memory.
  • - -
  • The final argument, regList, specifies which registers are in this - class. If an alternative allocation order method is not specified, then - regList also defines the order of allocation used by the register - allocator. Besides simply listing registers with (add R0, R1, ...), - more advanced set operators are available. See - include/llvm/Target/Target.td for more information.
  • -
- -

-In SparcRegisterInfo.td, three RegisterClass objects are defined: -FPRegs, DFPRegs, and IntRegs. For all three register -classes, the first argument defines the namespace with the string -'SP'. FPRegs defines a group of 32 single-precision -floating-point registers (F0 to F31); DFPRegs defines -a group of 16 double-precision registers -(D0-D15). -

- -
-
-// F0, F1, F2, ..., F31
-def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
-
-def DFPRegs : RegisterClass<"SP", [f64], 64,
-                            (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
-                                 D9, D10, D11, D12, D13, D14, D15)>;
- 
-def IntRegs : RegisterClass<"SP", [i32], 32,
-    (add L0, L1, L2, L3, L4, L5, L6, L7,
-         I0, I1, I2, I3, I4, I5,
-         O0, O1, O2, O3, O4, O5, O7,
-         G1,
-         // Non-allocatable regs:
-         G2, G3, G4,
-         O6,        // stack ptr
-         I6,        // frame ptr
-         I7,        // return address
-         G0,        // constant zero
-         G5, G6, G7 // reserved for kernel
-    )>;
-
-
- -

-Using SparcRegisterInfo.td with TableGen generates several output files -that are intended for inclusion in other source code that you write. -SparcRegisterInfo.td generates SparcGenRegisterInfo.h.inc, -which should be included in the header file for the implementation of the SPARC -register implementation that you write (SparcRegisterInfo.h). In -SparcGenRegisterInfo.h.inc a new structure is defined called -SparcGenRegisterInfo that uses TargetRegisterInfo as its -base. It also specifies types, based upon the defined register -classes: DFPRegsClass, FPRegsClass, and IntRegsClass. -

- -

-SparcRegisterInfo.td also generates SparcGenRegisterInfo.inc, -which is included at the bottom of SparcRegisterInfo.cpp, the SPARC -register implementation. The code below shows only the generated integer -registers and associated register classes. The order of registers -in IntRegs reflects the order in the definition of IntRegs in -the target description file. -

- -
-
  // IntRegs Register Class...
-  static const unsigned IntRegs[] = {
-    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
-    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
-    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
-    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
-    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
-    SP::G6, SP::G7, 
-  };
-
-  // IntRegsVTs Register Class Value Types...
-  static const MVT::ValueType IntRegsVTs[] = {
-    MVT::i32, MVT::Other
-  };
-
-namespace SP {   // Register class instances
-  DFPRegsClass    DFPRegsRegClass;
-  FPRegsClass     FPRegsRegClass;
-  IntRegsClass    IntRegsRegClass;
-...
-  // IntRegs Sub-register Classess...
-  static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Super-register Classess...
-  static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class sub-classes...
-  static const TargetRegisterClass* const IntRegsSubclasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class super-classes...
-  static const TargetRegisterClass* const IntRegsSuperclasses [] = {
-    NULL
-  };
-
-  IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, 
-    IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, 
-    IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
-}
-
-
- -

-The register allocators will avoid using reserved registers, and callee saved -registers are not used until all the volatile registers have been used. That -is usually good enough, but in some cases it may be necessary to provide custom -allocation orders. -

- -
- - -

- Implement a subclass of - TargetRegisterInfo -

- -
- -

-The final step is to hand code portions of XXXRegisterInfo, which -implements the interface described in TargetRegisterInfo.h. These -functions return 0, NULL, or false, unless -overridden. Here is a list of functions that are overridden for the SPARC -implementation in SparcRegisterInfo.cpp: -

- -
    -
  • getCalleeSavedRegs — Returns a list of callee-saved registers - in the order of the desired callee-save stack frame offset.
  • - -
  • getReservedRegs — Returns a bitset indexed by physical - register numbers, indicating if a particular register is unavailable.
  • - -
  • hasFP — Return a Boolean indicating if a function should have - a dedicated frame pointer register.
  • - -
  • eliminateCallFramePseudoInstr — If call frame setup or - destroy pseudo instructions are used, this can be called to eliminate - them.
  • - -
  • eliminateFrameIndex — Eliminate abstract frame indices from - instructions that may use them.
  • - -
  • emitPrologue — Insert prologue code into the function.
  • - -
  • emitEpilogue — Insert epilogue code into the function.
  • -
- -
- -
- - -

- Instruction Set -

- - -
- -

-During the early stages of code generation, the LLVM IR code is converted to a -SelectionDAG with nodes that are instances of the SDNode class -containing target instructions. An SDNode has an opcode, operands, type -requirements, and operation properties. For example, is an operation -commutative, does an operation load from memory. The various operation node -types are described in the include/llvm/CodeGen/SelectionDAGNodes.h -file (values of the NodeType enum in the ISD namespace). -

- -

-TableGen uses the following target description (.td) input files to -generate much of the code for instruction definition: -

- -
    -
  • Target.td — Where the Instruction, Operand, - InstrInfo, and other fundamental classes are defined.
  • - -
  • TargetSelectionDAG.td— Used by SelectionDAG - instruction selection generators, contains SDTC* classes (selection - DAG type constraint), definitions of SelectionDAG nodes (such as - imm, cond, bb, add, fadd, - sub), and pattern support (Pattern, Pat, - PatFrag, PatLeaf, ComplexPattern.
  • - -
  • XXXInstrFormats.td — Patterns for definitions of - target-specific instructions.
  • - -
  • XXXInstrInfo.td — Target-specific definitions of instruction - templates, condition codes, and instructions of an instruction set. For - architecture modifications, a different file name may be used. For example, - for Pentium with SSE instruction, this file is X86InstrSSE.td, and - for Pentium with MMX, this file is X86InstrMMX.td.
  • -
- -

-There is also a target-specific XXX.td file, where XXX is the -name of the target. The XXX.td file includes the other .td -input files, but its contents are only directly important for subtargets. -

- -

-You should describe a concrete target-specific class XXXInstrInfo that -represents machine instructions supported by a target machine. -XXXInstrInfo contains an array of XXXInstrDescriptor objects, -each of which describes one instruction. An instruction descriptor defines:

- -
    -
  • Opcode mnemonic
  • - -
  • Number of operands
  • - -
  • List of implicit register definitions and uses
  • - -
  • Target-independent properties (such as memory access, is commutable)
  • - -
  • Target-specific flags
  • -
- -

-The Instruction class (defined in Target.td) is mostly used as a base -for more complex instruction classes. -

- -
-
class Instruction {
-  string Namespace = "";
-  dag OutOperandList;       // An dag containing the MI def operand list.
-  dag InOperandList;        // An dag containing the MI use operand list.
-  string AsmString = "";    // The .s format to print the instruction with.
-  list<dag> Pattern;  // Set to the DAG pattern for this instruction
-  list<Register> Uses = []; 
-  list<Register> Defs = [];
-  list<Predicate> Predicates = [];  // predicates turned into isel match code
-  ... remainder not shown for space ...
-}
-
-
- -

-A SelectionDAG node (SDNode) should contain an object -representing a target-specific instruction that is defined -in XXXInstrInfo.td. The instruction objects should represent -instructions from the architecture manual of the target machine (such as the -SPARC Architecture Manual for the SPARC target). -

- -

-A single instruction from the architecture manual is often modeled as multiple -target instructions, depending upon its operands. For example, a manual might -describe an add instruction that takes a register or an immediate operand. An -LLVM target could model this with two instructions named ADDri and -ADDrr. -

- -

-You should define a class for each instruction category and define each opcode -as a subclass of the category with appropriate parameters such as the fixed -binary encoding of opcodes and extended opcodes. You should map the register -bits to the bits of the instruction in which they are encoded (for the -JIT). Also you should specify how the instruction should be printed when the -automatic assembly printer is used. -

- -

-As is described in the SPARC Architecture Manual, Version 8, there are three -major 32-bit formats for instructions. Format 1 is only for the CALL -instruction. Format 2 is for branch on condition codes and SETHI (set -high bits of a register) instructions. Format 3 is for other instructions. -

- -

-Each of these formats has corresponding classes in SparcInstrFormat.td. -InstSP is a base class for other instruction classes. Additional base -classes are specified for more precise formats: for example -in SparcInstrFormat.td, F2_1 is for SETHI, -and F2_2 is for branches. There are three other base -classes: F3_1 for register/register operations, F3_2 for -register/immediate operations, and F3_3 for floating-point -operations. SparcInstrInfo.td also adds the base class Pseudo for -synthetic SPARC instructions. -

- -

-SparcInstrInfo.td largely consists of operand and instruction -definitions for the SPARC target. In SparcInstrInfo.td, the following -target description file entry, LDrr, defines the Load Integer -instruction for a Word (the LD SPARC opcode) from a memory address to a -register. The first parameter, the value 3 (112), is the -operation value for this category of operation. The second parameter -(0000002) is the specific operation value -for LD/Load Word. The third parameter is the output destination, which -is a register operand and defined in the Register target description -file (IntRegs). -

- -
-
def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
-
-
- -

-The fourth parameter is the input source, which uses the address -operand MEMrr that is defined earlier in SparcInstrInfo.td: -

- -
-
def MEMrr : Operand<i32> {
-  let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-
- -

-The fifth parameter is a string that is used by the assembly printer and can be -left as an empty string until the assembly printer interface is implemented. The -sixth and final parameter is the pattern used to match the instruction during -the SelectionDAG Select Phase described in -(The LLVM -Target-Independent Code Generator). This parameter is detailed in the next -section, Instruction Selector. -

- -

-Instruction class definitions are not overloaded for different operand types, so -separate versions of instructions are needed for register, memory, or immediate -value operands. For example, to perform a Load Integer instruction for a Word -from an immediate operand to a register, the following instruction class is -defined: -

- -
-
def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRri:$addr))]>;
-
-
- -

-Writing these definitions for so many similar instructions can involve a lot of -cut and paste. In td files, the multiclass directive enables the -creation of templates to define several instruction classes at once (using -the defm directive). For example in SparcInstrInfo.td, the -multiclass pattern F3_12 is defined to create 2 instruction -classes each time F3_12 is invoked: -

- -
-
multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
-  def rr  : F3_1 <2, Op3Val, 
-                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
-  def ri  : F3_2 <2, Op3Val,
-                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
-}
-
-
- -

-So when the defm directive is used for the XOR -and ADD instructions, as seen below, it creates four instruction -objects: XORrr, XORri, ADDrr, and ADDri. -

- -
-
-defm XOR   : F3_12<"xor", 0b000011, xor>;
-defm ADD   : F3_12<"add", 0b000000, add>;
-
-
- -

-SparcInstrInfo.td also includes definitions for condition codes that -are referenced by branch instructions. The following definitions -in SparcInstrInfo.td indicate the bit location of the SPARC condition -code. For example, the 10th bit represents the 'greater than' -condition for integers, and the 22nd bit represents the 'greater -than' condition for floats. -

- -
-
-def ICC_NE  : ICC_VAL< 9>;  // Not Equal
-def ICC_E   : ICC_VAL< 1>;  // Equal
-def ICC_G   : ICC_VAL<10>;  // Greater
-...
-def FCC_U   : FCC_VAL<23>;  // Unordered
-def FCC_G   : FCC_VAL<22>;  // Greater
-def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
-...
-
-
- -

-(Note that Sparc.h also defines enums that correspond to the same SPARC -condition codes. Care must be taken to ensure the values in Sparc.h -correspond to the values in SparcInstrInfo.td. I.e., -SPCC::ICC_NE = 9, SPCC::FCC_U = 23 and so on.) -

- - -

- Instruction Operand Mapping -

- -
- -

-The code generator backend maps instruction operands to fields in the -instruction. Operands are assigned to unbound fields in the instruction in the -order they are defined. Fields are bound when they are assigned a value. For -example, the Sparc target defines the XNORrr instruction as -a F3_1 format instruction having three operands. -

- -
-
-def XNORrr  : F3_1<2, 0b000111,
-                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                   "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
-
-
- -

-The instruction templates in SparcInstrFormats.td show the base class -for F3_1 is InstSP. -

- -
-
-class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
-  field bits<32> Inst;
-  let Namespace = "SP";
-  bits<2> op;
-  let Inst{31-30} = op;       
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
-}
-
-
- -

InstSP leaves the op field unbound.

- -
-
-class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstSP<outs, ins, asmstr, pattern> {
-  bits<5> rd;
-  bits<6> op3;
-  bits<5> rs1;
-  let op{1} = 1;   // Op = 2 or 3
-  let Inst{29-25} = rd;
-  let Inst{24-19} = op3;
-  let Inst{18-14} = rs1;
-}
-
-
- -

-F3 binds the op field and defines the rd, -op3, and rs1 fields. F3 format instructions will -bind the operands rd, op3, and rs1 fields. -

- -
-
-class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
-           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
-  bits<8> asi = 0; // asi not currently used
-  bits<5> rs2;
-  let op         = opVal;
-  let op3        = op3val;
-  let Inst{13}   = 0;     // i field = 0
-  let Inst{12-5} = asi;   // address space identifier
-  let Inst{4-0}  = rs2;
-}
-
-
- -

-F3_1 binds the op3 field and defines the rs2 -fields. F3_1 format instructions will bind the operands to the rd, -rs1, and rs2 fields. This results in the XNORrr -instruction binding $dst, $b, and $c operands to -the rd, rs1, and rs2 fields respectively. -

- -
- - -

- Instruction Relation Mapping -

- -
- -

-This TableGen feature is used to relate instructions with each other. It is -particularly useful when you have multiple instruction formats and need to -switch between them after instruction selection. This entire feature is driven -by relation models which can be defined in XXXInstrInfo.td files -according to the target-specific instruction set. Relation models are defined -using InstrMapping class as a base. TableGen parses all the models -and generates instruction relation maps using the specified information. -Relation maps are emitted as tables in the XXXGenInstrInfo.inc file -along with the functions to query them. For the detailed information on how to -use this feature, please refer to -How to add Instruction Mappings -document. -

-
- - -

- Implement a subclass of - TargetInstrInfo -

- -
- -

-The final step is to hand code portions of XXXInstrInfo, which -implements the interface described in TargetInstrInfo.h. These -functions return 0 or a Boolean or they assert, unless -overridden. Here's a list of functions that are overridden for the SPARC -implementation in SparcInstrInfo.cpp: -

- -
    -
  • isLoadFromStackSlot — If the specified machine instruction is - a direct load from a stack slot, return the register number of the - destination and the FrameIndex of the stack slot.
  • - -
  • isStoreToStackSlot — If the specified machine instruction is - a direct store to a stack slot, return the register number of the - destination and the FrameIndex of the stack slot.
  • - -
  • copyPhysReg — Copy values between a pair of physical - registers.
  • - -
  • storeRegToStackSlot — Store a register value to a stack - slot.
  • - -
  • loadRegFromStackSlot — Load a register value from a stack - slot.
  • - -
  • storeRegToAddr — Store a register value to memory.
  • - -
  • loadRegFromAddr — Load a register value from memory.
  • - -
  • foldMemoryOperand — Attempt to combine instructions of any - load or store instruction for the specified operand(s).
  • -
- -
- - -

- Branch Folding and If Conversion -

-
- -

-Performance can be improved by combining instructions or by eliminating -instructions that are never reached. The AnalyzeBranch method -in XXXInstrInfo may be implemented to examine conditional instructions -and remove unnecessary instructions. AnalyzeBranch looks at the end of -a machine basic block (MBB) for opportunities for improvement, such as branch -folding and if conversion. The BranchFolder and IfConverter -machine function passes (see the source files BranchFolding.cpp and -IfConversion.cpp in the lib/CodeGen directory) call -AnalyzeBranch to improve the control flow graph that represents the -instructions. -

- -

-Several implementations of AnalyzeBranch (for ARM, Alpha, and X86) can -be examined as models for your own AnalyzeBranch implementation. Since -SPARC does not implement a useful AnalyzeBranch, the ARM target -implementation is shown below. -

- -

AnalyzeBranch returns a Boolean value and takes four parameters:

- -
    -
  • MachineBasicBlock &MBB — The incoming block to be - examined.
  • - -
  • MachineBasicBlock *&TBB — A destination block that is - returned. For a conditional branch that evaluates to true, TBB is - the destination.
  • - -
  • MachineBasicBlock *&FBB — For a conditional branch that - evaluates to false, FBB is returned as the destination.
  • - -
  • std::vector<MachineOperand> &Cond — List of - operands to evaluate a condition for a conditional branch.
  • -
- -

-In the simplest case, if a block ends without a branch, then it falls through to -the successor block. No destination blocks are specified for either TBB -or FBB, so both parameters return NULL. The start of -the AnalyzeBranch (see code below for the ARM target) shows the -function parameters and the code for the simplest case. -

- -
-
bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-        MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
-        std::vector<MachineOperand> &Cond) const
-{
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
-    return false;
-
-
- -

-If a block ends with a single unconditional branch instruction, then -AnalyzeBranch (shown below) should return the destination of that -branch in the TBB parameter. -

- -
-
-  if (LastOpc == ARM::B || LastOpc == ARM::tB) {
-    TBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-
- -

-If a block ends with two unconditional branches, then the second branch is never -reached. In that situation, as shown below, remove the last branch instruction -and return the penultimate branch in the TBB parameter. -

- -
-
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    I->eraseFromParent();
-    return false;
-  }
-
-
- -

-A block may end with a single conditional branch instruction that falls through -to successor block if the condition evaluates to false. In that case, -AnalyzeBranch (shown below) should return the destination of that -conditional branch in the TBB parameter and a list of operands in -the Cond parameter to evaluate the condition. -

- -
-
-  if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
-    // Block ends with fall-through condbranch.
-    TBB = LastInst->getOperand(0).getMBB();
-    Cond.push_back(LastInst->getOperand(1));
-    Cond.push_back(LastInst->getOperand(2));
-    return false;
-  }
-
-
- -

-If a block ends with both a conditional branch and an ensuing unconditional -branch, then AnalyzeBranch (shown below) should return the conditional -branch destination (assuming it corresponds to a conditional evaluation of -'true') in the TBB parameter and the unconditional branch -destination in the FBB (corresponding to a conditional evaluation of -'false'). A list of operands to evaluate the condition should be -returned in the Cond parameter. -

- -
-
-  unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
-  if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
-    TBB =  SecondLastInst->getOperand(0).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(1));
-    Cond.push_back(SecondLastInst->getOperand(2));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-
- -

-For the last two cases (ending with a single conditional branch or ending with -one conditional and one unconditional branch), the operands returned in -the Cond parameter can be passed to methods of other instructions to -create new branches or perform other operations. An implementation -of AnalyzeBranch requires the helper methods RemoveBranch -and InsertBranch to manage subsequent operations. -

- -

-AnalyzeBranch should return false indicating success in most circumstances. -AnalyzeBranch should only return true when the method is stumped about what to -do, for example, if a block has three terminating branches. AnalyzeBranch may -return true if it encounters a terminator it cannot handle, such as an indirect -branch. -

- -
- -
- - -

- Instruction Selector -

- - -
- -

-LLVM uses a SelectionDAG to represent LLVM IR instructions, and nodes -of the SelectionDAG ideally represent native target -instructions. During code generation, instruction selection passes are performed -to convert non-native DAG instructions into native target-specific -instructions. The pass described in XXXISelDAGToDAG.cpp is used to -match patterns and perform DAG-to-DAG instruction selection. Optionally, a pass -may be defined (in XXXBranchSelector.cpp) to perform similar DAG-to-DAG -operations for branch instructions. Later, the code in -XXXISelLowering.cpp replaces or removes operations and data types not -supported natively (legalizes) in a SelectionDAG. -

- -

-TableGen generates code for instruction selection using the following target -description input files: -

- -
    -
  • XXXInstrInfo.td — Contains definitions of instructions in a - target-specific instruction set, generates XXXGenDAGISel.inc, which - is included in XXXISelDAGToDAG.cpp.
  • - -
  • XXXCallingConv.td — Contains the calling and return value - conventions for the target architecture, and it generates - XXXGenCallingConv.inc, which is included in - XXXISelLowering.cpp.
  • -
- -

-The implementation of an instruction selection pass must include a header that -declares the FunctionPass class or a subclass of FunctionPass. In -XXXTargetMachine.cpp, a Pass Manager (PM) should add each instruction -selection pass into the queue of passes to run. -

- -

-The LLVM static compiler (llc) is an excellent tool for visualizing the -contents of DAGs. To display the SelectionDAG before or after specific -processing phases, use the command line options for llc, described -at -SelectionDAG Instruction Selection Process. -

- -

-To describe instruction selector behavior, you should add patterns for lowering -LLVM code into a SelectionDAG as the last parameter of the instruction -definitions in XXXInstrInfo.td. For example, in -SparcInstrInfo.td, this entry defines a register store operation, and -the last parameter describes a pattern with the store DAG operator. -

- -
-
-def STrr  : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
-                 "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]>;
-
-
- -

-ADDRrr is a memory mode that is also defined in -SparcInstrInfo.td: -

- -
-
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-
-
- -

-The definition of ADDRrr refers to SelectADDRrr, which is a -function defined in an implementation of the Instructor Selector (such -as SparcISelDAGToDAG.cpp). -

- -

-In lib/Target/TargetSelectionDAG.td, the DAG operator for store is -defined below: -

- -
-
-def store : PatFrag<(ops node:$val, node:$ptr),
-                    (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
-    return !ST->isTruncatingStore() && 
-           ST->getAddressingMode() == ISD::UNINDEXED;
-  return false;
-}]>;
-
-
- -

-XXXInstrInfo.td also generates (in XXXGenDAGISel.inc) the -SelectCode method that is used to call the appropriate processing -method for an instruction. In this example, SelectCode -calls Select_ISD_STORE for the ISD::STORE opcode. -

- -
-
-SDNode *SelectCode(SDValue N) {
-  ... 
-  MVT::ValueType NVT = N.getNode()->getValueType(0);
-  switch (N.getOpcode()) {
-  case ISD::STORE: {
-    switch (NVT) {
-    default:
-      return Select_ISD_STORE(N);
-      break;
-    }
-    break;
-  }
-  ...
-
-
- -

-The pattern for STrr is matched, so elsewhere in -XXXGenDAGISel.inc, code for STrr is created for -Select_ISD_STORE. The Emit_22 method is also generated -in XXXGenDAGISel.inc to complete the processing of this -instruction. -

- -
-
-SDNode *Select_ISD_STORE(const SDValue &N) {
-  SDValue Chain = N.getOperand(0);
-  if (Predicate_store(N.getNode())) {
-    SDValue N1 = N.getOperand(1);
-    SDValue N2 = N.getOperand(2);
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-
-    // Pattern: (st:void IntRegs:i32:$src, 
-    //           ADDRrr:i32:$addr)<<P:Predicate_store>>
-    // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
-    // Pattern complexity = 13  cost = 1  size = 0
-    if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &&
-        N1.getNode()->getValueType(0) == MVT::i32 &&
-        N2.getNode()->getValueType(0) == MVT::i32) {
-      return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
-    }
-...
-
-
- - -

- The SelectionDAG Legalize Phase -

- -
- -

-The Legalize phase converts a DAG to use types and operations that are natively -supported by the target. For natively unsupported types and operations, you need -to add code to the target-specific XXXTargetLowering implementation to convert -unsupported types and operations to supported ones. -

- -

-In the constructor for the XXXTargetLowering class, first use the -addRegisterClass method to specify which types are supports and which -register classes are associated with them. The code for the register classes are -generated by TableGen from XXXRegisterInfo.td and placed -in XXXGenRegisterInfo.h.inc. For example, the implementation of the -constructor for the SparcTargetLowering class (in -SparcISelLowering.cpp) starts with the following code: -

- -
-
-addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
-addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
-addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); 
-
-
- -

-You should examine the node types in the ISD namespace -(include/llvm/CodeGen/SelectionDAGNodes.h) and determine which -operations the target natively supports. For operations that do not have -native support, add a callback to the constructor for the XXXTargetLowering -class, so the instruction selection process knows what to do. The TargetLowering -class callback methods (declared in llvm/Target/TargetLowering.h) are: -

- -
    -
  • setOperationAction — General operation.
  • - -
  • setLoadExtAction — Load with extension.
  • - -
  • setTruncStoreAction — Truncating store.
  • - -
  • setIndexedLoadAction — Indexed load.
  • - -
  • setIndexedStoreAction — Indexed store.
  • - -
  • setConvertAction — Type conversion.
  • - -
  • setCondCodeAction — Support for a given condition code.
  • -
- -

-Note: on older releases, setLoadXAction is used instead -of setLoadExtAction. Also, on older releases, -setCondCodeAction may not be supported. Examine your release -to see what methods are specifically supported. -

- -

-These callbacks are used to determine that an operation does or does not work -with a specified type (or types). And in all cases, the third parameter is -a LegalAction type enum value: Promote, Expand, -Custom, or Legal. SparcISelLowering.cpp -contains examples of all four LegalAction values. -

- - -

- Promote -

- -
- -

-For an operation without native support for a given type, the specified type may -be promoted to a larger type that is supported. For example, SPARC does not -support a sign-extending load for Boolean values (i1 type), so -in SparcISelLowering.cpp the third parameter below, Promote, -changes i1 type values to a large type before loading. -

- -
-
-setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-
-
- -
- - -

- Expand -

- -
- -

-For a type without native support, a value may need to be broken down further, -rather than promoted. For an operation without native support, a combination of -other operations may be used to similar effect. In SPARC, the floating-point -sine and cosine trig operations are supported by expansion to other operations, -as indicated by the third parameter, Expand, to -setOperationAction: -

- -
-
-setOperationAction(ISD::FSIN, MVT::f32, Expand);
-setOperationAction(ISD::FCOS, MVT::f32, Expand);
-
-
- -
- - -

- Custom -

- -
- -

-For some operations, simple type promotion or operation expansion may be -insufficient. In some cases, a special intrinsic function must be implemented. -

- -

-For example, a constant value may require special treatment, or an operation may -require spilling and restoring registers in the stack and working with register -allocators. -

- -

-As seen in SparcISelLowering.cpp code below, to perform a type -conversion from a floating point value to a signed integer, first the -setOperationAction should be called with Custom as the third -parameter: -

- -
-
-setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-
-
- -

-In the LowerOperation method, for each Custom operation, a -case statement should be added to indicate what function to call. In the -following code, an FP_TO_SINT opcode will call -the LowerFP_TO_SINT method: -

- -
-
-SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
-  switch (Op.getOpcode()) {
-  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
-  ...
-  }
-}
-
-
- -

-Finally, the LowerFP_TO_SINT method is implemented, using an FP -register to convert the floating-point value to an integer. -

- -
-
-static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
-  assert(Op.getValueType() == MVT::i32);
-  Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
-}
-
-
- -
- - -

- Legal -

- -
- -

-The Legal LegalizeAction enum value simply indicates that an -operation is natively supported. Legal represents the default -condition, so it is rarely used. In SparcISelLowering.cpp, the action -for CTPOP (an operation to count the bits set in an integer) is -natively supported only for SPARC v9. The following code enables -the Expand conversion technique for non-v9 SPARC implementations. -

- -
-
-setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-...
-if (TM.getSubtarget<SparcSubtarget>().isV9())
-  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
-  case ISD::SETULT: return SPCC::ICC_CS;
-  case ISD::SETULE: return SPCC::ICC_LEU;
-  case ISD::SETUGT: return SPCC::ICC_GU;
-  case ISD::SETUGE: return SPCC::ICC_CC;
-  }
-}
-
-
- -
- -
- - -

- Calling Conventions -

- -
- -

-To support target-specific calling conventions, XXXGenCallingConv.td -uses interfaces (such as CCIfType and CCAssignToReg) that are defined in -lib/Target/TargetCallingConv.td. TableGen can take the target -descriptor file XXXGenCallingConv.td and generate the header -file XXXGenCallingConv.inc, which is typically included -in XXXISelLowering.cpp. You can use the interfaces in -TargetCallingConv.td to specify: -

- -
    -
  • The order of parameter allocation.
  • - -
  • Where parameters and return values are placed (that is, on the stack or in - registers).
  • - -
  • Which registers may be used.
  • - -
  • Whether the caller or callee unwinds the stack.
  • -
- -

-The following example demonstrates the use of the CCIfType and -CCAssignToReg interfaces. If the CCIfType predicate is true -(that is, if the current argument is of type f32 or f64), then -the action is performed. In this case, the CCAssignToReg action assigns -the argument value to the first available register: either R0 -or R1. -

- -
-
-CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
-
-
- -

-SparcCallingConv.td contains definitions for a target-specific -return-value calling convention (RetCC_Sparc32) and a basic 32-bit C calling -convention (CC_Sparc32). The definition of RetCC_Sparc32 -(shown below) indicates which registers are used for specified scalar return -types. A single-precision float is returned to register F0, and a -double-precision float goes to register D0. A 32-bit integer is -returned in register I0 or I1. -

- -
-
-def RetCC_Sparc32 : CallingConv<[
-  CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
-  CCIfType<[f32], CCAssignToReg<[F0]>>,
-  CCIfType<[f64], CCAssignToReg<[D0]>>
-]>;
-
-
- -

-The definition of CC_Sparc32 in SparcCallingConv.td introduces -CCAssignToStack, which assigns the value to a stack slot with the -specified size and alignment. In the example below, the first parameter, 4, -indicates the size of the slot, and the second parameter, also 4, indicates the -stack alignment along 4-byte units. (Special cases: if size is zero, then the -ABI size is used; if alignment is zero, then the ABI alignment is used.) -

- -
-
-def CC_Sparc32 : CallingConv<[
-  // All arguments get passed in integer registers if there is space.
-  CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCAssignToStack<4, 4>
-]>;
-
-
- -

-CCDelegateTo is another commonly used interface, which tries to find a -specified sub-calling convention, and, if a match is found, it is invoked. In -the following example (in X86CallingConv.td), the definition of -RetCC_X86_32_C ends with CCDelegateTo. After the current value -is assigned to the register ST0 or ST1, -the RetCC_X86Common is invoked. -

- -
-
-def RetCC_X86_32_C : CallingConv<[
-  CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
-  CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>,
-  CCDelegateTo<RetCC_X86Common>
-]>;
-
-
- -

-CCIfCC is an interface that attempts to match the given name to the -current calling convention. If the name identifies the current calling -convention, then a specified action is invoked. In the following example (in -X86CallingConv.td), if the Fast calling convention is in use, -then RetCC_X86_32_Fast is invoked. If the SSECall calling -convention is in use, then RetCC_X86_32_SSE is invoked. -

- -
-
-def RetCC_X86_32 : CallingConv<[
-  CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
-  CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo<RetCC_X86_32_SSE>>,
-  CCDelegateTo<RetCC_X86_32_C>
-]>;
-
-
- -

Other calling convention interfaces include:

- -
    -
  • CCIf <predicate, action> — If the predicate matches, - apply the action.
  • - -
  • CCIfInReg <action> — If the argument is marked with the - 'inreg' attribute, then apply the action.
  • - -
  • CCIfNest <action> — Inf the argument is marked with the - 'nest' attribute, then apply the action.
  • - -
  • CCIfNotVarArg <action> — If the current function does - not take a variable number of arguments, apply the action.
  • - -
  • CCAssignToRegWithShadow <registerList, shadowList> — - similar to CCAssignToReg, but with a shadow list of registers.
  • - -
  • CCPassByVal <size, align> — Assign value to a stack - slot with the minimum specified size and alignment.
  • - -
  • CCPromoteToType <type> — Promote the current value to - the specified type.
  • - -
  • CallingConv <[actions]> — Define each calling - convention that is supported.
  • -
- -
- -
- - -

- Assembly Printer -

- - -
- -

-During the code emission stage, the code generator may utilize an LLVM pass to -produce assembly output. To do this, you want to implement the code for a -printer that converts LLVM IR to a GAS-format assembly language for your target -machine, using the following steps: -

- -
    -
  • Define all the assembly strings for your target, adding them to the - instructions defined in the XXXInstrInfo.td file. - (See Instruction Set.) TableGen will produce - an output file (XXXGenAsmWriter.inc) with an implementation of - the printInstruction method for the XXXAsmPrinter class.
  • - -
  • Write XXXTargetAsmInfo.h, which contains the bare-bones declaration - of the XXXTargetAsmInfo class (a subclass - of TargetAsmInfo).
  • - -
  • Write XXXTargetAsmInfo.cpp, which contains target-specific values - for TargetAsmInfo properties and sometimes new implementations for - methods.
  • - -
  • Write XXXAsmPrinter.cpp, which implements the AsmPrinter - class that performs the LLVM-to-assembly conversion.
  • -
- -

-The code in XXXTargetAsmInfo.h is usually a trivial declaration of the -XXXTargetAsmInfo class for use in XXXTargetAsmInfo.cpp. -Similarly, XXXTargetAsmInfo.cpp usually has a few declarations of -XXXTargetAsmInfo replacement values that override the default values -in TargetAsmInfo.cpp. For example in SparcTargetAsmInfo.cpp: -

- -
-
-SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {
-  Data16bitsDirective = "\t.half\t";
-  Data32bitsDirective = "\t.word\t";
-  Data64bitsDirective = 0;  // .xword is only supported by V9.
-  ZeroDirective = "\t.skip\t";
-  CommentString = "!";
-  ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
-}
-
-
- -

-The X86 assembly printer implementation (X86TargetAsmInfo) is an -example where the target specific TargetAsmInfo class uses an -overridden methods: ExpandInlineAsm. -

- -

-A target-specific implementation of AsmPrinter is written in -XXXAsmPrinter.cpp, which implements the AsmPrinter class that -converts the LLVM to printable assembly. The implementation must include the -following headers that have declarations for the AsmPrinter and -MachineFunctionPass classes. The MachineFunctionPass is a -subclass of FunctionPass. -

- -
-
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h" 
-
-
- -

-As a FunctionPass, AsmPrinter first -calls doInitialization to set up the AsmPrinter. In -SparcAsmPrinter, a Mangler object is instantiated to process -variable names. -

- -

-In XXXAsmPrinter.cpp, the runOnMachineFunction method -(declared in MachineFunctionPass) must be implemented -for XXXAsmPrinter. In MachineFunctionPass, -the runOnFunction method invokes runOnMachineFunction. -Target-specific implementations of runOnMachineFunction differ, but -generally do the following to process each machine function: -

- -
    -
  • Call SetupMachineFunction to perform initialization.
  • - -
  • Call EmitConstantPool to print out (to the output stream) constants - which have been spilled to memory.
  • - -
  • Call EmitJumpTableInfo to print out jump tables used by the current - function.
  • - -
  • Print out the label for the current function.
  • - -
  • Print out the code for the function, including basic block labels and the - assembly for the instruction (using printInstruction)
  • -
- -

-The XXXAsmPrinter implementation must also include the code generated -by TableGen that is output in the XXXGenAsmWriter.inc file. The code -in XXXGenAsmWriter.inc contains an implementation of the -printInstruction method that may call these methods: -

- -
    -
  • printOperand
  • - -
  • printMemOperand
  • - -
  • printCCOperand (for conditional statements)
  • - -
  • printDataDirective
  • - -
  • printDeclare
  • - -
  • printImplicitDef
  • - -
  • printInlineAsm
  • -
- -

-The implementations of printDeclare, printImplicitDef, -printInlineAsm, and printLabel in AsmPrinter.cpp are -generally adequate for printing assembly and do not need to be -overridden. -

- -

-The printOperand method is implemented with a long switch/case -statement for the type of operand: register, immediate, basic block, external -symbol, global address, constant pool index, or jump table index. For an -instruction with a memory address operand, the printMemOperand method -should be implemented to generate the proper output. Similarly, -printCCOperand should be used to print a conditional operand. -

- -

doFinalization should be overridden in XXXAsmPrinter, and -it should be called to shut down the assembly printer. During -doFinalization, global variables and constants are printed to -output. -

- -
- - -

- Subtarget Support -

- - -
- -

-Subtarget support is used to inform the code generation process of instruction -set variations for a given chip set. For example, the LLVM SPARC implementation -provided covers three major versions of the SPARC microprocessor architecture: -Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a 64-bit -architecture), and the UltraSPARC architecture. V8 has 16 double-precision -floating-point registers that are also usable as either 32 single-precision or 8 -quad-precision registers. V8 is also purely big-endian. V9 has 32 -double-precision floating-point registers that are also usable as 16 -quad-precision registers, but cannot be used as single-precision registers. The -UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set -extensions. -

- -

-If subtarget support is needed, you should implement a target-specific -XXXSubtarget class for your architecture. This class should process the -command-line options -mcpu= and -mattr=. -

- -

-TableGen uses definitions in the Target.td and Sparc.td files -to generate code in SparcGenSubtarget.inc. In Target.td, shown -below, the SubtargetFeature interface is defined. The first 4 string -parameters of the SubtargetFeature interface are a feature name, an -attribute set by the feature, the value of the attribute, and a description of -the feature. (The fifth parameter is a list of features whose presence is -implied, and its default value is an empty array.) -

- -
-
-class SubtargetFeature<string n, string a,  string v, string d,
-                       list<SubtargetFeature> i = []> {
-  string Name = n;
-  string Attribute = a;
-  string Value = v;
-  string Desc = d;
-  list<SubtargetFeature> Implies = i;
-}
-
-
- -

-In the Sparc.td file, the SubtargetFeature is used to define the -following features. -

- -
-
-def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
-                     "Enable SPARC-V9 instructions">;
-def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8", 
-                     "V8DeprecatedInsts", "true",
-                     "Enable deprecated V8 instructions in V9 mode">;
-def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true",
-                     "Enable UltraSPARC Visual Instruction Set extensions">;
-
-
- -

-Elsewhere in Sparc.td, the Proc class is defined and then is used to -define particular SPARC processor subtypes that may have the previously -described features. -

- -
-
-class Proc<string Name, list<SubtargetFeature> Features>
-  : Processor<Name, NoItineraries, Features>;
- 
-def : Proc<"generic",         []>;
-def : Proc<"v8",              []>;
-def : Proc<"supersparc",      []>;
-def : Proc<"sparclite",       []>;
-def : Proc<"f934",            []>;
-def : Proc<"hypersparc",      []>;
-def : Proc<"sparclite86x",    []>;
-def : Proc<"sparclet",        []>;
-def : Proc<"tsc701",          []>;
-def : Proc<"v9",              [FeatureV9]>;
-def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
-def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
-def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
-
-
- -

-From Target.td and Sparc.td files, the resulting -SparcGenSubtarget.inc specifies enum values to identify the features, arrays of -constants to represent the CPU features and CPU subtypes, and the -ParseSubtargetFeatures method that parses the features string that sets -specified subtarget options. The generated SparcGenSubtarget.inc file -should be included in the SparcSubtarget.cpp. The target-specific -implementation of the XXXSubtarget method should follow this pseudocode: -

- -
-
-XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) {
-  // Set the default features
-  // Determine default and user specified characteristics of the CPU
-  // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
-  // Perform any additional operations
-}
-
-
- -
- - -

- JIT Support -

- - -
- -

-The implementation of a target machine optionally includes a Just-In-Time (JIT) -code generator that emits machine code and auxiliary structures as binary output -that can be written directly to memory. To do this, implement JIT code -generation by performing the following steps: -

- -
    -
  • Write an XXXCodeEmitter.cpp file that contains a machine function - pass that transforms target-machine instructions into relocatable machine - code.
  • - -
  • Write an XXXJITInfo.cpp file that implements the JIT interfaces for - target-specific code-generation activities, such as emitting machine code - and stubs.
  • - -
  • Modify XXXTargetMachine so that it provides a - TargetJITInfo object through its getJITInfo method.
  • -
- -

-There are several different approaches to writing the JIT support code. For -instance, TableGen and target descriptor files may be used for creating a JIT -code generator, but are not mandatory. For the Alpha and PowerPC target -machines, TableGen is used to generate XXXGenCodeEmitter.inc, which -contains the binary coding of machine instructions and the -getBinaryCodeForInstr method to access those codes. Other JIT -implementations do not. -

- -

-Both XXXJITInfo.cpp and XXXCodeEmitter.cpp must include the -llvm/CodeGen/MachineCodeEmitter.h header file that defines the -MachineCodeEmitter class containing code for several callback functions -that write data (in bytes, words, strings, etc.) to the output stream. -

- - -

- Machine Code Emitter -

- -
- -

-In XXXCodeEmitter.cpp, a target-specific of the Emitter class -is implemented as a function pass (subclass -of MachineFunctionPass). The target-specific implementation -of runOnMachineFunction (invoked by -runOnFunction in MachineFunctionPass) iterates through the -MachineBasicBlock calls emitInstruction to process each -instruction and emit binary code. emitInstruction is largely -implemented with case statements on the instruction types defined in -XXXInstrInfo.h. For example, in X86CodeEmitter.cpp, -the emitInstruction method is built around the following switch/case -statements: -

- -
-
-switch (Desc->TSFlags & X86::FormMask) {
-case X86II::Pseudo:  // for not yet implemented instructions 
-   ...               // or pseudo-instructions
-   break;
-case X86II::RawFrm:  // for instructions with a fixed opcode value
-   ...
-   break;
-case X86II::AddRegFrm: // for instructions that have one register operand 
-   ...                 // added to their opcode
-   break;
-case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (register)
-   break;
-case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (memory)
-   break;
-case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (register)
-   break;
-case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (memory)
-   break;
-case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on 
-case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
-case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
-case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
-case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
-case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
-case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRMInitReg: // for instructions whose source and
-   ...                  // destination are the same register
-   break;
-}
-
-
- -

-The implementations of these case statements often first emit the opcode and -then get the operand(s). Then depending upon the operand, helper methods may be -called to process the operand(s). For example, in X86CodeEmitter.cpp, -for the X86II::AddRegFrm case, the first data emitted -(by emitByte) is the opcode added to the register operand. Then an -object representing the machine operand, MO1, is extracted. The helper -methods such as isImmediate, -isGlobalAddress, isExternalSymbol, isConstantPoolIndex, and -isJumpTableIndex determine the operand -type. (X86CodeEmitter.cpp also has private methods such -as emitConstant, emitGlobalAddress, -emitExternalSymbolAddress, emitConstPoolAddress, -and emitJumpTableAddress that emit the data into the output stream.) -

- -
-
-case X86II::AddRegFrm:
-  MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
-  
-  if (CurOp != NumOps) {
-    const MachineOperand &MO1 = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-    if (MO1.isImmediate())
-      emitConstant(MO1.getImm(), Size);
-    else {
-      unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-        : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-      if (Opcode == X86::MOV64ri) 
-        rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
-      if (MO1.isGlobalAddress()) {
-        bool NeedStub = isa<Function>(MO1.getGlobal());
-        bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
-        emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                          NeedStub, isLazy);
-      } else if (MO1.isExternalSymbol())
-        emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-      else if (MO1.isConstantPoolIndex())
-        emitConstPoolAddress(MO1.getIndex(), rt);
-      else if (MO1.isJumpTableIndex())
-        emitJumpTableAddress(MO1.getIndex(), rt);
-    }
-  }
-  break;
-
-
- -

-In the previous example, XXXCodeEmitter.cpp uses the -variable rt, which is a RelocationType enum that may be used to -relocate addresses (for example, a global address with a PIC base offset). The -RelocationType enum for that target is defined in the short -target-specific XXXRelocations.h file. The RelocationType is used by -the relocate method defined in XXXJITInfo.cpp to rewrite -addresses for referenced global symbols. -

- -

-For example, X86Relocations.h specifies the following relocation types -for the X86 addresses. In all four cases, the relocated value is added to the -value already in memory. For reloc_pcrel_word -and reloc_picrel_word, there is an additional initial adjustment. -

- -
-
-enum RelocationType {
-  reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
-  reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
-  reloc_absolute_word = 2, // absolute relocation; no additional adjustment 
-  reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
-};
-
-
- -
- - -

- Target JIT Info -

- -
- -

-XXXJITInfo.cpp implements the JIT interfaces for target-specific -code-generation activities, such as emitting machine code and stubs. At minimum, -a target-specific version of XXXJITInfo implements the following: -

- -
    -
  • getLazyResolverFunction — Initializes the JIT, gives the - target a function that is used for compilation.
  • - -
  • emitFunctionStub — Returns a native function with a specified - address for a callback function.
  • - -
  • relocate — Changes the addresses of referenced globals, based - on relocation types.
  • - -
  • Callback function that are wrappers to a function stub that is used when the - real target is not initially known.
  • -
- -

-getLazyResolverFunction is generally trivial to implement. It makes the -incoming parameter as the global JITCompilerFunction and returns the -callback function that will be used a function wrapper. For the Alpha target -(in AlphaJITInfo.cpp), the getLazyResolverFunction -implementation is simply: -

- -
-
-TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(  
-                                            JITCompilerFn F) {
-  JITCompilerFunction = F;
-  return AlphaCompilationCallback;
-}
-
-
- -

-For the X86 target, the getLazyResolverFunction implementation is a -little more complication, because it returns a different callback function for -processors with SSE instructions and XMM registers. -

- -

-The callback function initially saves and later restores the callee register -values, incoming arguments, and frame and return address. The callback function -needs low-level access to the registers or stack, so it is typically implemented -with assembler. -

- -
- -
- - - -
-
- Valid CSS - Valid HTML 4.01 - - Mason Woo and Misha Brukman
- The LLVM Compiler Infrastructure -
- Last modified: $Date: 2012-10-25 17:54:06 +0200 (Thu, 25 Oct 2012) $ -
- - - diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst new file mode 100644 index 000000000000..a03a5e42c22d --- /dev/null +++ b/docs/WritingAnLLVMBackend.rst @@ -0,0 +1,1838 @@ +================================ +Writing an LLVM Compiler Backend +================================ + +.. toctree:: + :hidden: + + HowToUseInstrMappings + +.. contents:: + :local: + +Introduction +============ + +This document describes techniques for writing compiler backends that convert +the LLVM Intermediate Representation (IR) to code for a specified machine or +other languages. Code intended for a specific machine can take the form of +either assembly code or binary code (usable for a JIT compiler). + +The backend of LLVM features a target-independent code generator that may +create output for several types of target CPUs --- including X86, PowerPC, +ARM, and SPARC. The backend may also be used to generate code targeted at SPUs +of the Cell processor or GPUs to support the execution of compute kernels. + +The document focuses on existing examples found in subdirectories of +``llvm/lib/Target`` in a downloaded LLVM release. In particular, this document +focuses on the example of creating a static compiler (one that emits text +assembly) for a SPARC target, because SPARC has fairly standard +characteristics, such as a RISC instruction set and straightforward calling +conventions. + +Audience +-------- + +The audience for this document is anyone who needs to write an LLVM backend to +generate code for a specific hardware or software target. + +Prerequisite Reading +-------------------- + +These essential documents must be read before reading this document: + +* `LLVM Language Reference Manual `_ --- a reference manual for + the LLVM assembly language. + +* :doc:`CodeGenerator` --- a guide to the components (classes and code + generation algorithms) for translating the LLVM internal representation into + machine code for a specified target. Pay particular attention to the + descriptions of code generation stages: Instruction Selection, Scheduling and + Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code + Insertion, Late Machine Code Optimizations, and Code Emission. + +* :doc:`TableGenFundamentals` --- a document that describes the TableGen + (``tblgen``) application that manages domain-specific information to support + LLVM code generation. TableGen processes input from a target description + file (``.td`` suffix) and generates C++ code that can be used for code + generation. + +* :doc:`WritingAnLLVMPass` --- The assembly printer is a ``FunctionPass``, as + are several ``SelectionDAG`` processing steps. + +To follow the SPARC examples in this document, have a copy of `The SPARC +Architecture Manual, Version 8 `_ for +reference. For details about the ARM instruction set, refer to the `ARM +Architecture Reference Manual `_. For more about +the GNU Assembler format (``GAS``), see `Using As +`_, especially for the +assembly printer. "Using As" contains a list of target machine dependent +features. + +Basic Steps +----------- + +To write a compiler backend for LLVM that converts the LLVM IR to code for a +specified target (machine or other language), follow these steps: + +* Create a subclass of the ``TargetMachine`` class that describes + characteristics of your target machine. Copy existing examples of specific + ``TargetMachine`` class and header files; for example, start with + ``SparcTargetMachine.cpp`` and ``SparcTargetMachine.h``, but change the file + names for your target. Similarly, change code that references "``Sparc``" to + reference your target. + +* Describe the register set of the target. Use TableGen to generate code for + register definition, register aliases, and register classes from a + target-specific ``RegisterInfo.td`` input file. You should also write + additional code for a subclass of the ``TargetRegisterInfo`` class that + represents the class register file data used for register allocation and also + describes the interactions between registers. + +* Describe the instruction set of the target. Use TableGen to generate code + for target-specific instructions from target-specific versions of + ``TargetInstrFormats.td`` and ``TargetInstrInfo.td``. You should write + additional code for a subclass of the ``TargetInstrInfo`` class to represent + machine instructions supported by the target machine. + +* Describe the selection and conversion of the LLVM IR from a Directed Acyclic + Graph (DAG) representation of instructions to native target-specific + instructions. Use TableGen to generate code that matches patterns and + selects instructions based on additional information in a target-specific + version of ``TargetInstrInfo.td``. Write code for ``XXXISelDAGToDAG.cpp``, + where ``XXX`` identifies the specific target, to perform pattern matching and + DAG-to-DAG instruction selection. Also write code in ``XXXISelLowering.cpp`` + to replace or remove operations and data types that are not supported + natively in a SelectionDAG. + +* Write code for an assembly printer that converts LLVM IR to a GAS format for + your target machine. You should add assembly strings to the instructions + defined in your target-specific version of ``TargetInstrInfo.td``. You + should also write code for a subclass of ``AsmPrinter`` that performs the + LLVM-to-assembly conversion and a trivial subclass of ``TargetAsmInfo``. + +* Optionally, add support for subtargets (i.e., variants with different + capabilities). You should also write code for a subclass of the + ``TargetSubtarget`` class, which allows you to use the ``-mcpu=`` and + ``-mattr=`` command-line options. + +* Optionally, add JIT support and create a machine code emitter (subclass of + ``TargetJITInfo``) that is used to emit binary code directly into memory. + +In the ``.cpp`` and ``.h``. files, initially stub up these methods and then +implement them later. Initially, you may not know which private members that +the class will need and which components will need to be subclassed. + +Preliminaries +------------- + +To actually create your compiler backend, you need to create and modify a few +files. The absolute minimum is discussed here. But to actually use the LLVM +target-independent code generator, you must perform the steps described in the +:doc:`LLVM Target-Independent Code Generator ` document. + +First, you should create a subdirectory under ``lib/Target`` to hold all the +files related to your target. If your target is called "Dummy", create the +directory ``lib/Target/Dummy``. + +In this new directory, create a ``Makefile``. It is easiest to copy a +``Makefile`` of another target and modify it. It should at least contain the +``LEVEL``, ``LIBRARYNAME`` and ``TARGET`` variables, and then include +``$(LEVEL)/Makefile.common``. The library can be named ``LLVMDummy`` (for +example, see the MIPS target). Alternatively, you can split the library into +``LLVMDummyCodeGen`` and ``LLVMDummyAsmPrinter``, the latter of which should be +implemented in a subdirectory below ``lib/Target/Dummy`` (for example, see the +PowerPC target). + +Note that these two naming schemes are hardcoded into ``llvm-config``. Using +any other naming scheme will confuse ``llvm-config`` and produce a lot of +(seemingly unrelated) linker errors when linking ``llc``. + +To make your target actually do something, you need to implement a subclass of +``TargetMachine``. This implementation should typically be in the file +``lib/Target/DummyTargetMachine.cpp``, but any file in the ``lib/Target`` +directory will be built and should work. To use LLVM's target independent code +generator, you should do what all current machine backends do: create a +subclass of ``LLVMTargetMachine``. (To create a target from scratch, create a +subclass of ``TargetMachine``.) + +To get LLVM to actually build and link your target, you need to add it to the +``TARGETS_TO_BUILD`` variable. To do this, you modify the configure script to +know about your target when parsing the ``--enable-targets`` option. Search +the configure script for ``TARGETS_TO_BUILD``, add your target to the lists +there (some creativity required), and then reconfigure. Alternatively, you can +change ``autotools/configure.ac`` and regenerate configure by running +``./autoconf/AutoRegen.sh``. + +Target Machine +============== + +``LLVMTargetMachine`` is designed as a base class for targets implemented with +the LLVM target-independent code generator. The ``LLVMTargetMachine`` class +should be specialized by a concrete target class that implements the various +virtual methods. ``LLVMTargetMachine`` is defined as a subclass of +``TargetMachine`` in ``include/llvm/Target/TargetMachine.h``. The +``TargetMachine`` class implementation (``TargetMachine.cpp``) also processes +numerous command-line options. + +To create a concrete target-specific subclass of ``LLVMTargetMachine``, start +by copying an existing ``TargetMachine`` class and header. You should name the +files that you create to reflect your specific target. For instance, for the +SPARC target, name the files ``SparcTargetMachine.h`` and +``SparcTargetMachine.cpp``. + +For a target machine ``XXX``, the implementation of ``XXXTargetMachine`` must +have access methods to obtain objects that represent target components. These +methods are named ``get*Info``, and are intended to obtain the instruction set +(``getInstrInfo``), register set (``getRegisterInfo``), stack frame layout +(``getFrameInfo``), and similar information. ``XXXTargetMachine`` must also +implement the ``getDataLayout`` method to access an object with target-specific +data characteristics, such as data type size and alignment requirements. + +For instance, for the SPARC target, the header file ``SparcTargetMachine.h`` +declares prototypes for several ``get*Info`` and ``getDataLayout`` methods that +simply return a class member. + +.. code-block:: c++ + + namespace llvm { + + class Module; + + class SparcTargetMachine : public LLVMTargetMachine { + const DataLayout DataLayout; // Calculates type size & alignment + SparcSubtarget Subtarget; + SparcInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + + protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + public: + SparcTargetMachine(const Module &M, const std::string &FS); + + virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; } + virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; } + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const DataLayout *getDataLayout() const { return &DataLayout; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, bool Fast); + virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast); + }; + + } // end namespace llvm + +* ``getInstrInfo()`` +* ``getRegisterInfo()`` +* ``getFrameInfo()`` +* ``getDataLayout()`` +* ``getSubtargetImpl()`` + +For some targets, you also need to support the following methods: + +* ``getTargetLowering()`` +* ``getJITInfo()`` + +In addition, the ``XXXTargetMachine`` constructor should specify a +``TargetDescription`` string that determines the data layout for the target +machine, including characteristics such as pointer size, alignment, and +endianness. For example, the constructor for ``SparcTargetMachine`` contains +the following: + +.. code-block:: c++ + + SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS) + : DataLayout("E-p:32:32-f128:128:128"), + Subtarget(M, FS), InstrInfo(Subtarget), + FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { + } + +Hyphens separate portions of the ``TargetDescription`` string. + +* An upper-case "``E``" in the string indicates a big-endian target data model. + A lower-case "``e``" indicates little-endian. + +* "``p:``" is followed by pointer information: size, ABI alignment, and + preferred alignment. If only two figures follow "``p:``", then the first + value is pointer size, and the second value is both ABI and preferred + alignment. + +* Then a letter for numeric type alignment: "``i``", "``f``", "``v``", or + "``a``" (corresponding to integer, floating point, vector, or aggregate). + "``i``", "``v``", or "``a``" are followed by ABI alignment and preferred + alignment. "``f``" is followed by three values: the first indicates the size + of a long double, then ABI alignment, and then ABI preferred alignment. + +Target Registration +=================== + +You must also register your target with the ``TargetRegistry``, which is what +other LLVM tools use to be able to lookup and use your target at runtime. The +``TargetRegistry`` can be used directly, but for most targets there are helper +templates which should take care of the work for you. + +All targets should declare a global ``Target`` object which is used to +represent the target during registration. Then, in the target's ``TargetInfo`` +library, the target should define that object and use the ``RegisterTarget`` +template to register the target. For example, the Sparc registration code +looks like this: + +.. code-block:: c++ + + Target llvm::TheSparcTarget; + + extern "C" void LLVMInitializeSparcTargetInfo() { + RegisterTarget + X(TheSparcTarget, "sparc", "Sparc"); + } + +This allows the ``TargetRegistry`` to look up the target by name or by target +triple. In addition, most targets will also register additional features which +are available in separate libraries. These registration steps are separate, +because some clients may wish to only link in some parts of the target --- the +JIT code generator does not require the use of the assembler printer, for +example. Here is an example of registering the Sparc assembly printer: + +.. code-block:: c++ + + extern "C" void LLVMInitializeSparcAsmPrinter() { + RegisterAsmPrinter X(TheSparcTarget); + } + +For more information, see "`llvm/Target/TargetRegistry.h +`_". + +Register Set and Register Classes +================================= + +You should describe a concrete target-specific class that represents the +register file of a target machine. This class is called ``XXXRegisterInfo`` +(where ``XXX`` identifies the target) and represents the class register file +data that is used for register allocation. It also describes the interactions +between registers. + +You also need to define register classes to categorize related registers. A +register class should be added for groups of registers that are all treated the +same way for some instruction. Typical examples are register classes for +integer, floating-point, or vector registers. A register allocator allows an +instruction to use any register in a specified register class to perform the +instruction in a similar manner. Register classes allocate virtual registers +to instructions from these sets, and register classes let the +target-independent register allocator automatically choose the actual +registers. + +Much of the code for registers, including register definition, register +aliases, and register classes, is generated by TableGen from +``XXXRegisterInfo.td`` input files and placed in ``XXXGenRegisterInfo.h.inc`` +and ``XXXGenRegisterInfo.inc`` output files. Some of the code in the +implementation of ``XXXRegisterInfo`` requires hand-coding. + +Defining a Register +------------------- + +The ``XXXRegisterInfo.td`` file typically starts with register definitions for +a target machine. The ``Register`` class (specified in ``Target.td``) is used +to define an object for each register. The specified string ``n`` becomes the +``Name`` of the register. The basic ``Register`` object does not have any +subregisters and does not specify any aliases. + +.. code-block:: llvm + + class Register { + string Namespace = ""; + string AsmName = n; + string Name = n; + int SpillSize = 0; + int SpillAlignment = 0; + list Aliases = []; + list SubRegs = []; + list DwarfNumbers = []; + } + +For example, in the ``X86RegisterInfo.td`` file, there are register definitions +that utilize the ``Register`` class, such as: + +.. code-block:: llvm + + def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>; + +This defines the register ``AL`` and assigns it values (with ``DwarfRegNum``) +that are used by ``gcc``, ``gdb``, or a debug information writer to identify a +register. For register ``AL``, ``DwarfRegNum`` takes an array of 3 values +representing 3 different modes: the first element is for X86-64, the second for +exception handling (EH) on X86-32, and the third is generic. -1 is a special +Dwarf number that indicates the gcc number is undefined, and -2 indicates the +register number is invalid for this mode. + +From the previously described line in the ``X86RegisterInfo.td`` file, TableGen +generates this code in the ``X86GenRegisterInfo.inc`` file: + +.. code-block:: c++ + + static const unsigned GR8[] = { X86::AL, ... }; + + const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 }; + + const TargetRegisterDesc RegisterDescriptors[] = { + ... + { "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ... + +From the register info file, TableGen generates a ``TargetRegisterDesc`` object +for each register. ``TargetRegisterDesc`` is defined in +``include/llvm/Target/TargetRegisterInfo.h`` with the following fields: + +.. code-block:: c++ + + struct TargetRegisterDesc { + const char *AsmName; // Assembly language name for the register + const char *Name; // Printable name for the reg (for debugging) + const unsigned *AliasSet; // Register Alias Set + const unsigned *SubRegs; // Sub-register set + const unsigned *ImmSubRegs; // Immediate sub-register set + const unsigned *SuperRegs; // Super-register set + }; + +TableGen uses the entire target description file (``.td``) to determine text +names for the register (in the ``AsmName`` and ``Name`` fields of +``TargetRegisterDesc``) and the relationships of other registers to the defined +register (in the other ``TargetRegisterDesc`` fields). In this example, other +definitions establish the registers "``AX``", "``EAX``", and "``RAX``" as +aliases for one another, so TableGen generates a null-terminated array +(``AL_AliasSet``) for this register alias set. + +The ``Register`` class is commonly used as a base class for more complex +classes. In ``Target.td``, the ``Register`` class is the base for the +``RegisterWithSubRegs`` class that is used to define registers that need to +specify subregisters in the ``SubRegs`` list, as shown here: + +.. code-block:: llvm + + class RegisterWithSubRegs subregs> : Register { + let SubRegs = subregs; + } + +In ``SparcRegisterInfo.td``, additional register classes are defined for SPARC: +a ``Register`` subclass, ``SparcReg``, and further subclasses: ``Ri``, ``Rf``, +and ``Rd``. SPARC registers are identified by 5-bit ID numbers, which is a +feature common to these subclasses. Note the use of "``let``" expressions to +override values that are initially defined in a superclass (such as ``SubRegs`` +field in the ``Rd`` class). + +.. code-block:: llvm + + class SparcReg : Register { + field bits<5> Num; + let Namespace = "SP"; + } + // Ri - 32-bit integer registers + class Ri num, string n> : + SparcReg { + let Num = num; + } + // Rf - 32-bit floating-point registers + class Rf num, string n> : + SparcReg { + let Num = num; + } + // Rd - Slots in the FP register file for 64-bit floating-point values. + class Rd num, string n, list subregs> : SparcReg { + let Num = num; + let SubRegs = subregs; + } + +In the ``SparcRegisterInfo.td`` file, there are register definitions that +utilize these subclasses of ``Register``, such as: + +.. code-block:: llvm + + def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; + def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; + ... + def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; + def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; + ... + def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>; + +The last two registers shown above (``D0`` and ``D1``) are double-precision +floating-point registers that are aliases for pairs of single-precision +floating-point sub-registers. In addition to aliases, the sub-register and +super-register relationships of the defined register are in fields of a +register's ``TargetRegisterDesc``. + +Defining a Register Class +------------------------- + +The ``RegisterClass`` class (specified in ``Target.td``) is used to define an +object that represents a group of related registers and also defines the +default allocation order of the registers. A target description file +``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes +using the following class: + +.. code-block:: llvm + + class RegisterClass regTypes, int alignment, dag regList> { + string Namespace = namespace; + list RegTypes = regTypes; + int Size = 0; // spill size, in bits; zero lets tblgen pick the size + int Alignment = alignment; + + // CopyCost is the cost of copying a value between two registers + // default value 1 means a single instruction + // A negative value means copying is extremely expensive or impossible + int CopyCost = 1; + dag MemberList = regList; + + // for register classes that are subregisters of this class + list SubRegClassList = []; + + code MethodProtos = [{}]; // to insert arbitrary code + code MethodBodies = [{}]; + } + +To define a ``RegisterClass``, use the following 4 arguments: + +* The first argument of the definition is the name of the namespace. + +* The second argument is a list of ``ValueType`` register type values that are + defined in ``include/llvm/CodeGen/ValueTypes.td``. Defined values include + integer types (such as ``i16``, ``i32``, and ``i1`` for Boolean), + floating-point types (``f32``, ``f64``), and vector types (for example, + ``v8i16`` for an ``8 x i16`` vector). All registers in a ``RegisterClass`` + must have the same ``ValueType``, but some registers may store vector data in + different configurations. For example a register that can process a 128-bit + vector may be able to handle 16 8-bit integer elements, 8 16-bit integers, 4 + 32-bit integers, and so on. + +* The third argument of the ``RegisterClass`` definition specifies the + alignment required of the registers when they are stored or loaded to + memory. + +* The final argument, ``regList``, specifies which registers are in this class. + If an alternative allocation order method is not specified, then ``regList`` + also defines the order of allocation used by the register allocator. Besides + simply listing registers with ``(add R0, R1, ...)``, more advanced set + operators are available. See ``include/llvm/Target/Target.td`` for more + information. + +In ``SparcRegisterInfo.td``, three ``RegisterClass`` objects are defined: +``FPRegs``, ``DFPRegs``, and ``IntRegs``. For all three register classes, the +first argument defines the namespace with the string "``SP``". ``FPRegs`` +defines a group of 32 single-precision floating-point registers (``F0`` to +``F31``); ``DFPRegs`` defines a group of 16 double-precision registers +(``D0-D15``). + +.. code-block:: llvm + + // F0, F1, F2, ..., F31 + def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; + + def DFPRegs : RegisterClass<"SP", [f64], 64, + (add D0, D1, D2, D3, D4, D5, D6, D7, D8, + D9, D10, D11, D12, D13, D14, D15)>; + + def IntRegs : RegisterClass<"SP", [i32], 32, + (add L0, L1, L2, L3, L4, L5, L6, L7, + I0, I1, I2, I3, I4, I5, + O0, O1, O2, O3, O4, O5, O7, + G1, + // Non-allocatable regs: + G2, G3, G4, + O6, // stack ptr + I6, // frame ptr + I7, // return address + G0, // constant zero + G5, G6, G7 // reserved for kernel + )>; + +Using ``SparcRegisterInfo.td`` with TableGen generates several output files +that are intended for inclusion in other source code that you write. +``SparcRegisterInfo.td`` generates ``SparcGenRegisterInfo.h.inc``, which should +be included in the header file for the implementation of the SPARC register +implementation that you write (``SparcRegisterInfo.h``). In +``SparcGenRegisterInfo.h.inc`` a new structure is defined called +``SparcGenRegisterInfo`` that uses ``TargetRegisterInfo`` as its base. It also +specifies types, based upon the defined register classes: ``DFPRegsClass``, +``FPRegsClass``, and ``IntRegsClass``. + +``SparcRegisterInfo.td`` also generates ``SparcGenRegisterInfo.inc``, which is +included at the bottom of ``SparcRegisterInfo.cpp``, the SPARC register +implementation. The code below shows only the generated integer registers and +associated register classes. The order of registers in ``IntRegs`` reflects +the order in the definition of ``IntRegs`` in the target description file. + +.. code-block:: c++ + + // IntRegs Register Class... + static const unsigned IntRegs[] = { + SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5, + SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3, + SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3, + SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3, + SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5, + SP::G6, SP::G7, + }; + + // IntRegsVTs Register Class Value Types... + static const MVT::ValueType IntRegsVTs[] = { + MVT::i32, MVT::Other + }; + + namespace SP { // Register class instances + DFPRegsClass DFPRegsRegClass; + FPRegsClass FPRegsRegClass; + IntRegsClass IntRegsRegClass; + ... + // IntRegs Sub-register Classess... + static const TargetRegisterClass* const IntRegsSubRegClasses [] = { + NULL + }; + ... + // IntRegs Super-register Classess... + static const TargetRegisterClass* const IntRegsSuperRegClasses [] = { + NULL + }; + ... + // IntRegs Register Class sub-classes... + static const TargetRegisterClass* const IntRegsSubclasses [] = { + NULL + }; + ... + // IntRegs Register Class super-classes... + static const TargetRegisterClass* const IntRegsSuperclasses [] = { + NULL + }; + + IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, + IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, + IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {} + } + +The register allocators will avoid using reserved registers, and callee saved +registers are not used until all the volatile registers have been used. That +is usually good enough, but in some cases it may be necessary to provide custom +allocation orders. + +Implement a subclass of ``TargetRegisterInfo`` +---------------------------------------------- + +The final step is to hand code portions of ``XXXRegisterInfo``, which +implements the interface described in ``TargetRegisterInfo.h`` (see +:ref:`TargetRegisterInfo`). These functions return ``0``, ``NULL``, or +``false``, unless overridden. Here is a list of functions that are overridden +for the SPARC implementation in ``SparcRegisterInfo.cpp``: + +* ``getCalleeSavedRegs`` --- Returns a list of callee-saved registers in the + order of the desired callee-save stack frame offset. + +* ``getReservedRegs`` --- Returns a bitset indexed by physical register + numbers, indicating if a particular register is unavailable. + +* ``hasFP`` --- Return a Boolean indicating if a function should have a + dedicated frame pointer register. + +* ``eliminateCallFramePseudoInstr`` --- If call frame setup or destroy pseudo + instructions are used, this can be called to eliminate them. + +* ``eliminateFrameIndex`` --- Eliminate abstract frame indices from + instructions that may use them. + +* ``emitPrologue`` --- Insert prologue code into the function. + +* ``emitEpilogue`` --- Insert epilogue code into the function. + +.. _instruction-set: + +Instruction Set +=============== + +During the early stages of code generation, the LLVM IR code is converted to a +``SelectionDAG`` with nodes that are instances of the ``SDNode`` class +containing target instructions. An ``SDNode`` has an opcode, operands, type +requirements, and operation properties. For example, is an operation +commutative, does an operation load from memory. The various operation node +types are described in the ``include/llvm/CodeGen/SelectionDAGNodes.h`` file +(values of the ``NodeType`` enum in the ``ISD`` namespace). + +TableGen uses the following target description (``.td``) input files to +generate much of the code for instruction definition: + +* ``Target.td`` --- Where the ``Instruction``, ``Operand``, ``InstrInfo``, and + other fundamental classes are defined. + +* ``TargetSelectionDAG.td`` --- Used by ``SelectionDAG`` instruction selection + generators, contains ``SDTC*`` classes (selection DAG type constraint), + definitions of ``SelectionDAG`` nodes (such as ``imm``, ``cond``, ``bb``, + ``add``, ``fadd``, ``sub``), and pattern support (``Pattern``, ``Pat``, + ``PatFrag``, ``PatLeaf``, ``ComplexPattern``. + +* ``XXXInstrFormats.td`` --- Patterns for definitions of target-specific + instructions. + +* ``XXXInstrInfo.td`` --- Target-specific definitions of instruction templates, + condition codes, and instructions of an instruction set. For architecture + modifications, a different file name may be used. For example, for Pentium + with SSE instruction, this file is ``X86InstrSSE.td``, and for Pentium with + MMX, this file is ``X86InstrMMX.td``. + +There is also a target-specific ``XXX.td`` file, where ``XXX`` is the name of +the target. The ``XXX.td`` file includes the other ``.td`` input files, but +its contents are only directly important for subtargets. + +You should describe a concrete target-specific class ``XXXInstrInfo`` that +represents machine instructions supported by a target machine. +``XXXInstrInfo`` contains an array of ``XXXInstrDescriptor`` objects, each of +which describes one instruction. An instruction descriptor defines: + +* Opcode mnemonic +* Number of operands +* List of implicit register definitions and uses +* Target-independent properties (such as memory access, is commutable) +* Target-specific flags + +The Instruction class (defined in ``Target.td``) is mostly used as a base for +more complex instruction classes. + +.. code-block:: llvm + + class Instruction { + string Namespace = ""; + dag OutOperandList; // A dag containing the MI def operand list. + dag InOperandList; // A dag containing the MI use operand list. + string AsmString = ""; // The .s format to print the instruction with. + list Pattern; // Set to the DAG pattern for this instruction. + list Uses = []; + list Defs = []; + list Predicates = []; // predicates turned into isel match code + ... remainder not shown for space ... + } + +A ``SelectionDAG`` node (``SDNode``) should contain an object representing a +target-specific instruction that is defined in ``XXXInstrInfo.td``. The +instruction objects should represent instructions from the architecture manual +of the target machine (such as the SPARC Architecture Manual for the SPARC +target). + +A single instruction from the architecture manual is often modeled as multiple +target instructions, depending upon its operands. For example, a manual might +describe an add instruction that takes a register or an immediate operand. An +LLVM target could model this with two instructions named ``ADDri`` and +``ADDrr``. + +You should define a class for each instruction category and define each opcode +as a subclass of the category with appropriate parameters such as the fixed +binary encoding of opcodes and extended opcodes. You should map the register +bits to the bits of the instruction in which they are encoded (for the JIT). +Also you should specify how the instruction should be printed when the +automatic assembly printer is used. + +As is described in the SPARC Architecture Manual, Version 8, there are three +major 32-bit formats for instructions. Format 1 is only for the ``CALL`` +instruction. Format 2 is for branch on condition codes and ``SETHI`` (set high +bits of a register) instructions. Format 3 is for other instructions. + +Each of these formats has corresponding classes in ``SparcInstrFormat.td``. +``InstSP`` is a base class for other instruction classes. Additional base +classes are specified for more precise formats: for example in +``SparcInstrFormat.td``, ``F2_1`` is for ``SETHI``, and ``F2_2`` is for +branches. There are three other base classes: ``F3_1`` for register/register +operations, ``F3_2`` for register/immediate operations, and ``F3_3`` for +floating-point operations. ``SparcInstrInfo.td`` also adds the base class +``Pseudo`` for synthetic SPARC instructions. + +``SparcInstrInfo.td`` largely consists of operand and instruction definitions +for the SPARC target. In ``SparcInstrInfo.td``, the following target +description file entry, ``LDrr``, defines the Load Integer instruction for a +Word (the ``LD`` SPARC opcode) from a memory address to a register. The first +parameter, the value 3 (``11``\ :sub:`2`), is the operation value for this +category of operation. The second parameter (``000000``\ :sub:`2`) is the +specific operation value for ``LD``/Load Word. The third parameter is the +output destination, which is a register operand and defined in the ``Register`` +target description file (``IntRegs``). + +.. code-block:: llvm + + def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr), + "ld [$addr], $dst", + [(set i32:$dst, (load ADDRrr:$addr))]>; + +The fourth parameter is the input source, which uses the address operand +``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``: + +.. code-block:: llvm + + def MEMrr : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); + } + +The fifth parameter is a string that is used by the assembly printer and can be +left as an empty string until the assembly printer interface is implemented. +The sixth and final parameter is the pattern used to match the instruction +during the SelectionDAG Select Phase described in :doc:`CodeGenerator`. +This parameter is detailed in the next section, :ref:`instruction-selector`. + +Instruction class definitions are not overloaded for different operand types, +so separate versions of instructions are needed for register, memory, or +immediate value operands. For example, to perform a Load Integer instruction +for a Word from an immediate operand to a register, the following instruction +class is defined: + +.. code-block:: llvm + + def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), + "ld [$addr], $dst", + [(set i32:$dst, (load ADDRri:$addr))]>; + +Writing these definitions for so many similar instructions can involve a lot of +cut and paste. In ``.td`` files, the ``multiclass`` directive enables the +creation of templates to define several instruction classes at once (using the +``defm`` directive). For example in ``SparcInstrInfo.td``, the ``multiclass`` +pattern ``F3_12`` is defined to create 2 instruction classes each time +``F3_12`` is invoked: + +.. code-block:: llvm + + multiclass F3_12 Op3Val, SDNode OpNode> { + def rr : F3_1 <2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat(OpcStr, " $b, $c, $dst"), + [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; + def ri : F3_2 <2, Op3Val, + (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $b, $c, $dst"), + [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>; + } + +So when the ``defm`` directive is used for the ``XOR`` and ``ADD`` +instructions, as seen below, it creates four instruction objects: ``XORrr``, +``XORri``, ``ADDrr``, and ``ADDri``. + +.. code-block:: llvm + + defm XOR : F3_12<"xor", 0b000011, xor>; + defm ADD : F3_12<"add", 0b000000, add>; + +``SparcInstrInfo.td`` also includes definitions for condition codes that are +referenced by branch instructions. The following definitions in +``SparcInstrInfo.td`` indicate the bit location of the SPARC condition code. +For example, the 10\ :sup:`th` bit represents the "greater than" condition for +integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for +floats. + +.. code-block:: llvm + + def ICC_NE : ICC_VAL< 9>; // Not Equal + def ICC_E : ICC_VAL< 1>; // Equal + def ICC_G : ICC_VAL<10>; // Greater + ... + def FCC_U : FCC_VAL<23>; // Unordered + def FCC_G : FCC_VAL<22>; // Greater + def FCC_UG : FCC_VAL<21>; // Unordered or Greater + ... + +(Note that ``Sparc.h`` also defines enums that correspond to the same SPARC +condition codes. Care must be taken to ensure the values in ``Sparc.h`` +correspond to the values in ``SparcInstrInfo.td``. I.e., ``SPCC::ICC_NE = 9``, +``SPCC::FCC_U = 23`` and so on.) + +Instruction Operand Mapping +--------------------------- + +The code generator backend maps instruction operands to fields in the +instruction. Operands are assigned to unbound fields in the instruction in the +order they are defined. Fields are bound when they are assigned a value. For +example, the Sparc target defines the ``XNORrr`` instruction as a ``F3_1`` +format instruction having three operands. + +.. code-block:: llvm + + def XNORrr : F3_1<2, 0b000111, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "xnor $b, $c, $dst", + [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>; + +The instruction templates in ``SparcInstrFormats.td`` show the base class for +``F3_1`` is ``InstSP``. + +.. code-block:: llvm + + class InstSP pattern> : Instruction { + field bits<32> Inst; + let Namespace = "SP"; + bits<2> op; + let Inst{31-30} = op; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + } + +``InstSP`` leaves the ``op`` field unbound. + +.. code-block:: llvm + + class F3 pattern> + : InstSP { + bits<5> rd; + bits<6> op3; + bits<5> rs1; + let op{1} = 1; // Op = 2 or 3 + let Inst{29-25} = rd; + let Inst{24-19} = op3; + let Inst{18-14} = rs1; + } + +``F3`` binds the ``op`` field and defines the ``rd``, ``op3``, and ``rs1`` +fields. ``F3`` format instructions will bind the operands ``rd``, ``op3``, and +``rs1`` fields. + +.. code-block:: llvm + + class F3_1 opVal, bits<6> op3val, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<8> asi = 0; // asi not currently used + bits<5> rs2; + let op = opVal; + let op3 = op3val; + let Inst{13} = 0; // i field = 0 + let Inst{12-5} = asi; // address space identifier + let Inst{4-0} = rs2; + } + +``F3_1`` binds the ``op3`` field and defines the ``rs2`` fields. ``F3_1`` +format instructions will bind the operands to the ``rd``, ``rs1``, and ``rs2`` +fields. This results in the ``XNORrr`` instruction binding ``$dst``, ``$b``, +and ``$c`` operands to the ``rd``, ``rs1``, and ``rs2`` fields respectively. + +Instruction Relation Mapping +---------------------------- + +This TableGen feature is used to relate instructions with each other. It is +particularly useful when you have multiple instruction formats and need to +switch between them after instruction selection. This entire feature is driven +by relation models which can be defined in ``XXXInstrInfo.td`` files +according to the target-specific instruction set. Relation models are defined +using ``InstrMapping`` class as a base. TableGen parses all the models +and generates instruction relation maps using the specified information. +Relation maps are emitted as tables in the ``XXXGenInstrInfo.inc`` file +along with the functions to query them. For the detailed information on how to +use this feature, please refer to :doc:`HowToUseInstrMappings`. + +Implement a subclass of ``TargetInstrInfo`` +------------------------------------------- + +The final step is to hand code portions of ``XXXInstrInfo``, which implements +the interface described in ``TargetInstrInfo.h`` (see :ref:`TargetInstrInfo`). +These functions return ``0`` or a Boolean or they assert, unless overridden. +Here's a list of functions that are overridden for the SPARC implementation in +``SparcInstrInfo.cpp``: + +* ``isLoadFromStackSlot`` --- If the specified machine instruction is a direct + load from a stack slot, return the register number of the destination and the + ``FrameIndex`` of the stack slot. + +* ``isStoreToStackSlot`` --- If the specified machine instruction is a direct + store to a stack slot, return the register number of the destination and the + ``FrameIndex`` of the stack slot. + +* ``copyPhysReg`` --- Copy values between a pair of physical registers. + +* ``storeRegToStackSlot`` --- Store a register value to a stack slot. + +* ``loadRegFromStackSlot`` --- Load a register value from a stack slot. + +* ``storeRegToAddr`` --- Store a register value to memory. + +* ``loadRegFromAddr`` --- Load a register value from memory. + +* ``foldMemoryOperand`` --- Attempt to combine instructions of any load or + store instruction for the specified operand(s). + +Branch Folding and If Conversion +-------------------------------- + +Performance can be improved by combining instructions or by eliminating +instructions that are never reached. The ``AnalyzeBranch`` method in +``XXXInstrInfo`` may be implemented to examine conditional instructions and +remove unnecessary instructions. ``AnalyzeBranch`` looks at the end of a +machine basic block (MBB) for opportunities for improvement, such as branch +folding and if conversion. The ``BranchFolder`` and ``IfConverter`` machine +function passes (see the source files ``BranchFolding.cpp`` and +``IfConversion.cpp`` in the ``lib/CodeGen`` directory) call ``AnalyzeBranch`` +to improve the control flow graph that represents the instructions. + +Several implementations of ``AnalyzeBranch`` (for ARM, Alpha, and X86) can be +examined as models for your own ``AnalyzeBranch`` implementation. Since SPARC +does not implement a useful ``AnalyzeBranch``, the ARM target implementation is +shown below. + +``AnalyzeBranch`` returns a Boolean value and takes four parameters: + +* ``MachineBasicBlock &MBB`` --- The incoming block to be examined. + +* ``MachineBasicBlock *&TBB`` --- A destination block that is returned. For a + conditional branch that evaluates to true, ``TBB`` is the destination. + +* ``MachineBasicBlock *&FBB`` --- For a conditional branch that evaluates to + false, ``FBB`` is returned as the destination. + +* ``std::vector &Cond`` --- List of operands to evaluate a + condition for a conditional branch. + +In the simplest case, if a block ends without a branch, then it falls through +to the successor block. No destination blocks are specified for either ``TBB`` +or ``FBB``, so both parameters return ``NULL``. The start of the +``AnalyzeBranch`` (see code below for the ARM target) shows the function +parameters and the code for the simplest case. + +.. code-block:: c++ + + bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + std::vector &Cond) const + { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + +If a block ends with a single unconditional branch instruction, then +``AnalyzeBranch`` (shown below) should return the destination of that branch in +the ``TBB`` parameter. + +.. code-block:: c++ + + if (LastOpc == ARM::B || LastOpc == ARM::tB) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + +If a block ends with two unconditional branches, then the second branch is +never reached. In that situation, as shown below, remove the last branch +instruction and return the penultimate branch in the ``TBB`` parameter. + +.. code-block:: c++ + + if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + I->eraseFromParent(); + return false; + } + +A block may end with a single conditional branch instruction that falls through +to successor block if the condition evaluates to false. In that case, +``AnalyzeBranch`` (shown below) should return the destination of that +conditional branch in the ``TBB`` parameter and a list of operands in the +``Cond`` parameter to evaluate the condition. + +.. code-block:: c++ + + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + return false; + } + +If a block ends with both a conditional branch and an ensuing unconditional +branch, then ``AnalyzeBranch`` (shown below) should return the conditional +branch destination (assuming it corresponds to a conditional evaluation of +"``true``") in the ``TBB`` parameter and the unconditional branch destination +in the ``FBB`` (corresponding to a conditional evaluation of "``false``"). A +list of operands to evaluate the condition should be returned in the ``Cond`` +parameter. + +.. code-block:: c++ + + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(1)); + Cond.push_back(SecondLastInst->getOperand(2)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + +For the last two cases (ending with a single conditional branch or ending with +one conditional and one unconditional branch), the operands returned in the +``Cond`` parameter can be passed to methods of other instructions to create new +branches or perform other operations. An implementation of ``AnalyzeBranch`` +requires the helper methods ``RemoveBranch`` and ``InsertBranch`` to manage +subsequent operations. + +``AnalyzeBranch`` should return false indicating success in most circumstances. +``AnalyzeBranch`` should only return true when the method is stumped about what +to do, for example, if a block has three terminating branches. +``AnalyzeBranch`` may return true if it encounters a terminator it cannot +handle, such as an indirect branch. + +.. _instruction-selector: + +Instruction Selector +==================== + +LLVM uses a ``SelectionDAG`` to represent LLVM IR instructions, and nodes of +the ``SelectionDAG`` ideally represent native target instructions. During code +generation, instruction selection passes are performed to convert non-native +DAG instructions into native target-specific instructions. The pass described +in ``XXXISelDAGToDAG.cpp`` is used to match patterns and perform DAG-to-DAG +instruction selection. Optionally, a pass may be defined (in +``XXXBranchSelector.cpp``) to perform similar DAG-to-DAG operations for branch +instructions. Later, the code in ``XXXISelLowering.cpp`` replaces or removes +operations and data types not supported natively (legalizes) in a +``SelectionDAG``. + +TableGen generates code for instruction selection using the following target +description input files: + +* ``XXXInstrInfo.td`` --- Contains definitions of instructions in a + target-specific instruction set, generates ``XXXGenDAGISel.inc``, which is + included in ``XXXISelDAGToDAG.cpp``. + +* ``XXXCallingConv.td`` --- Contains the calling and return value conventions + for the target architecture, and it generates ``XXXGenCallingConv.inc``, + which is included in ``XXXISelLowering.cpp``. + +The implementation of an instruction selection pass must include a header that +declares the ``FunctionPass`` class or a subclass of ``FunctionPass``. In +``XXXTargetMachine.cpp``, a Pass Manager (PM) should add each instruction +selection pass into the queue of passes to run. + +The LLVM static compiler (``llc``) is an excellent tool for visualizing the +contents of DAGs. To display the ``SelectionDAG`` before or after specific +processing phases, use the command line options for ``llc``, described at +:ref:`SelectionDAG-Process`. + +To describe instruction selector behavior, you should add patterns for lowering +LLVM code into a ``SelectionDAG`` as the last parameter of the instruction +definitions in ``XXXInstrInfo.td``. For example, in ``SparcInstrInfo.td``, +this entry defines a register store operation, and the last parameter describes +a pattern with the store DAG operator. + +.. code-block:: llvm + + def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), + "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>; + +``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``: + +.. code-block:: llvm + + def ADDRrr : ComplexPattern; + +The definition of ``ADDRrr`` refers to ``SelectADDRrr``, which is a function +defined in an implementation of the Instructor Selector (such as +``SparcISelDAGToDAG.cpp``). + +In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined +below: + +.. code-block:: llvm + + def store : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast(N)) + return !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED; + return false; + }]>; + +``XXXInstrInfo.td`` also generates (in ``XXXGenDAGISel.inc``) the +``SelectCode`` method that is used to call the appropriate processing method +for an instruction. In this example, ``SelectCode`` calls ``Select_ISD_STORE`` +for the ``ISD::STORE`` opcode. + +.. code-block:: c++ + + SDNode *SelectCode(SDValue N) { + ... + MVT::ValueType NVT = N.getNode()->getValueType(0); + switch (N.getOpcode()) { + case ISD::STORE: { + switch (NVT) { + default: + return Select_ISD_STORE(N); + break; + } + break; + } + ... + +The pattern for ``STrr`` is matched, so elsewhere in ``XXXGenDAGISel.inc``, +code for ``STrr`` is created for ``Select_ISD_STORE``. The ``Emit_22`` method +is also generated in ``XXXGenDAGISel.inc`` to complete the processing of this +instruction. + +.. code-block:: c++ + + SDNode *Select_ISD_STORE(const SDValue &N) { + SDValue Chain = N.getOperand(0); + if (Predicate_store(N.getNode())) { + SDValue N1 = N.getOperand(1); + SDValue N2 = N.getOperand(2); + SDValue CPTmp0; + SDValue CPTmp1; + + // Pattern: (st:void i32:i32:$src, + // ADDRrr:i32:$addr)<> + // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src) + // Pattern complexity = 13 cost = 1 size = 0 + if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) && + N1.getNode()->getValueType(0) == MVT::i32 && + N2.getNode()->getValueType(0) == MVT::i32) { + return Emit_22(N, SP::STrr, CPTmp0, CPTmp1); + } + ... + +The SelectionDAG Legalize Phase +------------------------------- + +The Legalize phase converts a DAG to use types and operations that are natively +supported by the target. For natively unsupported types and operations, you +need to add code to the target-specific ``XXXTargetLowering`` implementation to +convert unsupported types and operations to supported ones. + +In the constructor for the ``XXXTargetLowering`` class, first use the +``addRegisterClass`` method to specify which types are supported and which +register classes are associated with them. The code for the register classes +are generated by TableGen from ``XXXRegisterInfo.td`` and placed in +``XXXGenRegisterInfo.h.inc``. For example, the implementation of the +constructor for the SparcTargetLowering class (in ``SparcISelLowering.cpp``) +starts with the following code: + +.. code-block:: c++ + + addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); + addRegisterClass(MVT::f32, SP::FPRegsRegisterClass); + addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); + +You should examine the node types in the ``ISD`` namespace +(``include/llvm/CodeGen/SelectionDAGNodes.h``) and determine which operations +the target natively supports. For operations that do **not** have native +support, add a callback to the constructor for the ``XXXTargetLowering`` class, +so the instruction selection process knows what to do. The ``TargetLowering`` +class callback methods (declared in ``llvm/Target/TargetLowering.h``) are: + +* ``setOperationAction`` --- General operation. +* ``setLoadExtAction`` --- Load with extension. +* ``setTruncStoreAction`` --- Truncating store. +* ``setIndexedLoadAction`` --- Indexed load. +* ``setIndexedStoreAction`` --- Indexed store. +* ``setConvertAction`` --- Type conversion. +* ``setCondCodeAction`` --- Support for a given condition code. + +Note: on older releases, ``setLoadXAction`` is used instead of +``setLoadExtAction``. Also, on older releases, ``setCondCodeAction`` may not +be supported. Examine your release to see what methods are specifically +supported. + +These callbacks are used to determine that an operation does or does not work +with a specified type (or types). And in all cases, the third parameter is a +``LegalAction`` type enum value: ``Promote``, ``Expand``, ``Custom``, or +``Legal``. ``SparcISelLowering.cpp`` contains examples of all four +``LegalAction`` values. + +Promote +^^^^^^^ + +For an operation without native support for a given type, the specified type +may be promoted to a larger type that is supported. For example, SPARC does +not support a sign-extending load for Boolean values (``i1`` type), so in +``SparcISelLowering.cpp`` the third parameter below, ``Promote``, changes +``i1`` type values to a large type before loading. + +.. code-block:: c++ + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + +Expand +^^^^^^ + +For a type without native support, a value may need to be broken down further, +rather than promoted. For an operation without native support, a combination +of other operations may be used to similar effect. In SPARC, the +floating-point sine and cosine trig operations are supported by expansion to +other operations, as indicated by the third parameter, ``Expand``, to +``setOperationAction``: + +.. code-block:: c++ + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + +Custom +^^^^^^ + +For some operations, simple type promotion or operation expansion may be +insufficient. In some cases, a special intrinsic function must be implemented. + +For example, a constant value may require special treatment, or an operation +may require spilling and restoring registers in the stack and working with +register allocators. + +As seen in ``SparcISelLowering.cpp`` code below, to perform a type conversion +from a floating point value to a signed integer, first the +``setOperationAction`` should be called with ``Custom`` as the third parameter: + +.. code-block:: c++ + + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + +In the ``LowerOperation`` method, for each ``Custom`` operation, a case +statement should be added to indicate what function to call. In the following +code, an ``FP_TO_SINT`` opcode will call the ``LowerFP_TO_SINT`` method: + +.. code-block:: c++ + + SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + ... + } + } + +Finally, the ``LowerFP_TO_SINT`` method is implemented, using an FP register to +convert the floating-point value to an integer. + +.. code-block:: c++ + + static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::i32); + Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, MVT::i32, Op); + } + +Legal +^^^^^ + +The ``Legal`` ``LegalizeAction`` enum value simply indicates that an operation +**is** natively supported. ``Legal`` represents the default condition, so it +is rarely used. In ``SparcISelLowering.cpp``, the action for ``CTPOP`` (an +operation to count the bits set in an integer) is natively supported only for +SPARC v9. The following code enables the ``Expand`` conversion technique for +non-v9 SPARC implementations. + +.. code-block:: c++ + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + ... + if (TM.getSubtarget().isV9()) + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + +Calling Conventions +------------------- + +To support target-specific calling conventions, ``XXXGenCallingConv.td`` uses +interfaces (such as ``CCIfType`` and ``CCAssignToReg``) that are defined in +``lib/Target/TargetCallingConv.td``. TableGen can take the target descriptor +file ``XXXGenCallingConv.td`` and generate the header file +``XXXGenCallingConv.inc``, which is typically included in +``XXXISelLowering.cpp``. You can use the interfaces in +``TargetCallingConv.td`` to specify: + +* The order of parameter allocation. + +* Where parameters and return values are placed (that is, on the stack or in + registers). + +* Which registers may be used. + +* Whether the caller or callee unwinds the stack. + +The following example demonstrates the use of the ``CCIfType`` and +``CCAssignToReg`` interfaces. If the ``CCIfType`` predicate is true (that is, +if the current argument is of type ``f32`` or ``f64``), then the action is +performed. In this case, the ``CCAssignToReg`` action assigns the argument +value to the first available register: either ``R0`` or ``R1``. + +.. code-block:: llvm + + CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>> + +``SparcCallingConv.td`` contains definitions for a target-specific return-value +calling convention (``RetCC_Sparc32``) and a basic 32-bit C calling convention +(``CC_Sparc32``). The definition of ``RetCC_Sparc32`` (shown below) indicates +which registers are used for specified scalar return types. A single-precision +float is returned to register ``F0``, and a double-precision float goes to +register ``D0``. A 32-bit integer is returned in register ``I0`` or ``I1``. + +.. code-block:: llvm + + def RetCC_Sparc32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[I0, I1]>>, + CCIfType<[f32], CCAssignToReg<[F0]>>, + CCIfType<[f64], CCAssignToReg<[D0]>> + ]>; + +The definition of ``CC_Sparc32`` in ``SparcCallingConv.td`` introduces +``CCAssignToStack``, which assigns the value to a stack slot with the specified +size and alignment. In the example below, the first parameter, 4, indicates +the size of the slot, and the second parameter, also 4, indicates the stack +alignment along 4-byte units. (Special cases: if size is zero, then the ABI +size is used; if alignment is zero, then the ABI alignment is used.) + +.. code-block:: llvm + + def CC_Sparc32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + CCAssignToStack<4, 4> + ]>; + +``CCDelegateTo`` is another commonly used interface, which tries to find a +specified sub-calling convention, and, if a match is found, it is invoked. In +the following example (in ``X86CallingConv.td``), the definition of +``RetCC_X86_32_C`` ends with ``CCDelegateTo``. After the current value is +assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is +invoked. + +.. code-block:: llvm + + def RetCC_X86_32_C : CallingConv<[ + CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>, + CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>, + CCDelegateTo + ]>; + +``CCIfCC`` is an interface that attempts to match the given name to the current +calling convention. If the name identifies the current calling convention, +then a specified action is invoked. In the following example (in +``X86CallingConv.td``), if the ``Fast`` calling convention is in use, then +``RetCC_X86_32_Fast`` is invoked. If the ``SSECall`` calling convention is in +use, then ``RetCC_X86_32_SSE`` is invoked. + +.. code-block:: llvm + + def RetCC_X86_32 : CallingConv<[ + CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo>, + CCDelegateTo + ]>; + +Other calling convention interfaces include: + +* ``CCIf `` --- If the predicate matches, apply the action. + +* ``CCIfInReg `` --- If the argument is marked with the "``inreg``" + attribute, then apply the action. + +* ``CCIfNest `` --- If the argument is marked with the "``nest``" + attribute, then apply the action. + +* ``CCIfNotVarArg `` --- If the current function does not take a + variable number of arguments, apply the action. + +* ``CCAssignToRegWithShadow `` --- similar to + ``CCAssignToReg``, but with a shadow list of registers. + +* ``CCPassByVal `` --- Assign value to a stack slot with the + minimum specified size and alignment. + +* ``CCPromoteToType `` --- Promote the current value to the specified + type. + +* ``CallingConv <[actions]>`` --- Define each calling convention that is + supported. + +Assembly Printer +================ + +During the code emission stage, the code generator may utilize an LLVM pass to +produce assembly output. To do this, you want to implement the code for a +printer that converts LLVM IR to a GAS-format assembly language for your target +machine, using the following steps: + +* Define all the assembly strings for your target, adding them to the + instructions defined in the ``XXXInstrInfo.td`` file. (See + :ref:`instruction-set`.) TableGen will produce an output file + (``XXXGenAsmWriter.inc``) with an implementation of the ``printInstruction`` + method for the ``XXXAsmPrinter`` class. + +* Write ``XXXTargetAsmInfo.h``, which contains the bare-bones declaration of + the ``XXXTargetAsmInfo`` class (a subclass of ``TargetAsmInfo``). + +* Write ``XXXTargetAsmInfo.cpp``, which contains target-specific values for + ``TargetAsmInfo`` properties and sometimes new implementations for methods. + +* Write ``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that + performs the LLVM-to-assembly conversion. + +The code in ``XXXTargetAsmInfo.h`` is usually a trivial declaration of the +``XXXTargetAsmInfo`` class for use in ``XXXTargetAsmInfo.cpp``. Similarly, +``XXXTargetAsmInfo.cpp`` usually has a few declarations of ``XXXTargetAsmInfo`` +replacement values that override the default values in ``TargetAsmInfo.cpp``. +For example in ``SparcTargetAsmInfo.cpp``: + +.. code-block:: c++ + + SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "!"; + ConstantPoolSection = "\t.section \".rodata\",#alloc\n"; + } + +The X86 assembly printer implementation (``X86TargetAsmInfo``) is an example +where the target specific ``TargetAsmInfo`` class uses an overridden methods: +``ExpandInlineAsm``. + +A target-specific implementation of ``AsmPrinter`` is written in +``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that converts +the LLVM to printable assembly. The implementation must include the following +headers that have declarations for the ``AsmPrinter`` and +``MachineFunctionPass`` classes. The ``MachineFunctionPass`` is a subclass of +``FunctionPass``. + +.. code-block:: c++ + + #include "llvm/CodeGen/AsmPrinter.h" + #include "llvm/CodeGen/MachineFunctionPass.h" + +As a ``FunctionPass``, ``AsmPrinter`` first calls ``doInitialization`` to set +up the ``AsmPrinter``. In ``SparcAsmPrinter``, a ``Mangler`` object is +instantiated to process variable names. + +In ``XXXAsmPrinter.cpp``, the ``runOnMachineFunction`` method (declared in +``MachineFunctionPass``) must be implemented for ``XXXAsmPrinter``. In +``MachineFunctionPass``, the ``runOnFunction`` method invokes +``runOnMachineFunction``. Target-specific implementations of +``runOnMachineFunction`` differ, but generally do the following to process each +machine function: + +* Call ``SetupMachineFunction`` to perform initialization. + +* Call ``EmitConstantPool`` to print out (to the output stream) constants which + have been spilled to memory. + +* Call ``EmitJumpTableInfo`` to print out jump tables used by the current + function. + +* Print out the label for the current function. + +* Print out the code for the function, including basic block labels and the + assembly for the instruction (using ``printInstruction``) + +The ``XXXAsmPrinter`` implementation must also include the code generated by +TableGen that is output in the ``XXXGenAsmWriter.inc`` file. The code in +``XXXGenAsmWriter.inc`` contains an implementation of the ``printInstruction`` +method that may call these methods: + +* ``printOperand`` +* ``printMemOperand`` +* ``printCCOperand`` (for conditional statements) +* ``printDataDirective`` +* ``printDeclare`` +* ``printImplicitDef`` +* ``printInlineAsm`` + +The implementations of ``printDeclare``, ``printImplicitDef``, +``printInlineAsm``, and ``printLabel`` in ``AsmPrinter.cpp`` are generally +adequate for printing assembly and do not need to be overridden. + +The ``printOperand`` method is implemented with a long ``switch``/``case`` +statement for the type of operand: register, immediate, basic block, external +symbol, global address, constant pool index, or jump table index. For an +instruction with a memory address operand, the ``printMemOperand`` method +should be implemented to generate the proper output. Similarly, +``printCCOperand`` should be used to print a conditional operand. + +``doFinalization`` should be overridden in ``XXXAsmPrinter``, and it should be +called to shut down the assembly printer. During ``doFinalization``, global +variables and constants are printed to output. + +Subtarget Support +================= + +Subtarget support is used to inform the code generation process of instruction +set variations for a given chip set. For example, the LLVM SPARC +implementation provided covers three major versions of the SPARC microprocessor +architecture: Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a +64-bit architecture), and the UltraSPARC architecture. V8 has 16 +double-precision floating-point registers that are also usable as either 32 +single-precision or 8 quad-precision registers. V8 is also purely big-endian. +V9 has 32 double-precision floating-point registers that are also usable as 16 +quad-precision registers, but cannot be used as single-precision registers. +The UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set +extensions. + +If subtarget support is needed, you should implement a target-specific +``XXXSubtarget`` class for your architecture. This class should process the +command-line options ``-mcpu=`` and ``-mattr=``. + +TableGen uses definitions in the ``Target.td`` and ``Sparc.td`` files to +generate code in ``SparcGenSubtarget.inc``. In ``Target.td``, shown below, the +``SubtargetFeature`` interface is defined. The first 4 string parameters of +the ``SubtargetFeature`` interface are a feature name, an attribute set by the +feature, the value of the attribute, and a description of the feature. (The +fifth parameter is a list of features whose presence is implied, and its +default value is an empty array.) + +.. code-block:: llvm + + class SubtargetFeature i = []> { + string Name = n; + string Attribute = a; + string Value = v; + string Desc = d; + list Implies = i; + } + +In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the +following features. + +.. code-block:: llvm + + def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", + "Enable SPARC-V9 instructions">; + def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8", + "V8DeprecatedInsts", "true", + "Enable deprecated V8 instructions in V9 mode">; + def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true", + "Enable UltraSPARC Visual Instruction Set extensions">; + +Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to +define particular SPARC processor subtypes that may have the previously +described features. + +.. code-block:: llvm + + class Proc Features> + : Processor; + + def : Proc<"generic", []>; + def : Proc<"v8", []>; + def : Proc<"supersparc", []>; + def : Proc<"sparclite", []>; + def : Proc<"f934", []>; + def : Proc<"hypersparc", []>; + def : Proc<"sparclite86x", []>; + def : Proc<"sparclet", []>; + def : Proc<"tsc701", []>; + def : Proc<"v9", [FeatureV9]>; + def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>; + def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>; + def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; + +From ``Target.td`` and ``Sparc.td`` files, the resulting +``SparcGenSubtarget.inc`` specifies enum values to identify the features, +arrays of constants to represent the CPU features and CPU subtypes, and the +``ParseSubtargetFeatures`` method that parses the features string that sets +specified subtarget options. The generated ``SparcGenSubtarget.inc`` file +should be included in the ``SparcSubtarget.cpp``. The target-specific +implementation of the ``XXXSubtarget`` method should follow this pseudocode: + +.. code-block:: c++ + + XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) { + // Set the default features + // Determine default and user specified characteristics of the CPU + // Call ParseSubtargetFeatures(FS, CPU) to parse the features string + // Perform any additional operations + } + +JIT Support +=========== + +The implementation of a target machine optionally includes a Just-In-Time (JIT) +code generator that emits machine code and auxiliary structures as binary +output that can be written directly to memory. To do this, implement JIT code +generation by performing the following steps: + +* Write an ``XXXCodeEmitter.cpp`` file that contains a machine function pass + that transforms target-machine instructions into relocatable machine + code. + +* Write an ``XXXJITInfo.cpp`` file that implements the JIT interfaces for + target-specific code-generation activities, such as emitting machine code and + stubs. + +* Modify ``XXXTargetMachine`` so that it provides a ``TargetJITInfo`` object + through its ``getJITInfo`` method. + +There are several different approaches to writing the JIT support code. For +instance, TableGen and target descriptor files may be used for creating a JIT +code generator, but are not mandatory. For the Alpha and PowerPC target +machines, TableGen is used to generate ``XXXGenCodeEmitter.inc``, which +contains the binary coding of machine instructions and the +``getBinaryCodeForInstr`` method to access those codes. Other JIT +implementations do not. + +Both ``XXXJITInfo.cpp`` and ``XXXCodeEmitter.cpp`` must include the +``llvm/CodeGen/MachineCodeEmitter.h`` header file that defines the +``MachineCodeEmitter`` class containing code for several callback functions +that write data (in bytes, words, strings, etc.) to the output stream. + +Machine Code Emitter +-------------------- + +In ``XXXCodeEmitter.cpp``, a target-specific of the ``Emitter`` class is +implemented as a function pass (subclass of ``MachineFunctionPass``). The +target-specific implementation of ``runOnMachineFunction`` (invoked by +``runOnFunction`` in ``MachineFunctionPass``) iterates through the +``MachineBasicBlock`` calls ``emitInstruction`` to process each instruction and +emit binary code. ``emitInstruction`` is largely implemented with case +statements on the instruction types defined in ``XXXInstrInfo.h``. For +example, in ``X86CodeEmitter.cpp``, the ``emitInstruction`` method is built +around the following ``switch``/``case`` statements: + +.. code-block:: c++ + + switch (Desc->TSFlags & X86::FormMask) { + case X86II::Pseudo: // for not yet implemented instructions + ... // or pseudo-instructions + break; + case X86II::RawFrm: // for instructions with a fixed opcode value + ... + break; + case X86II::AddRegFrm: // for instructions that have one register operand + ... // added to their opcode + break; + case X86II::MRMDestReg:// for instructions that use the Mod/RM byte + ... // to specify a destination (register) + break; + case X86II::MRMDestMem:// for instructions that use the Mod/RM byte + ... // to specify a destination (memory) + break; + case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte + ... // to specify a source (register) + break; + case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte + ... // to specify a source (memory) + break; + case X86II::MRM0r: case X86II::MRM1r: // for instructions that operate on + case X86II::MRM2r: case X86II::MRM3r: // a REGISTER r/m operand and + case X86II::MRM4r: case X86II::MRM5r: // use the Mod/RM byte and a field + case X86II::MRM6r: case X86II::MRM7r: // to hold extended opcode data + ... + break; + case X86II::MRM0m: case X86II::MRM1m: // for instructions that operate on + case X86II::MRM2m: case X86II::MRM3m: // a MEMORY r/m operand and + case X86II::MRM4m: case X86II::MRM5m: // use the Mod/RM byte and a field + case X86II::MRM6m: case X86II::MRM7m: // to hold extended opcode data + ... + break; + case X86II::MRMInitReg: // for instructions whose source and + ... // destination are the same register + break; + } + +The implementations of these case statements often first emit the opcode and +then get the operand(s). Then depending upon the operand, helper methods may +be called to process the operand(s). For example, in ``X86CodeEmitter.cpp``, +for the ``X86II::AddRegFrm`` case, the first data emitted (by ``emitByte``) is +the opcode added to the register operand. Then an object representing the +machine operand, ``MO1``, is extracted. The helper methods such as +``isImmediate``, ``isGlobalAddress``, ``isExternalSymbol``, +``isConstantPoolIndex``, and ``isJumpTableIndex`` determine the operand type. +(``X86CodeEmitter.cpp`` also has private methods such as ``emitConstant``, +``emitGlobalAddress``, ``emitExternalSymbolAddress``, ``emitConstPoolAddress``, +and ``emitJumpTableAddress`` that emit the data into the output stream.) + +.. code-block:: c++ + + case X86II::AddRegFrm: + MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImmediate()) + emitConstant(MO1.getImm(), Size); + else { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobalAddress()) { + bool NeedStub = isa(MO1.getGlobal()); + bool isLazy = gvNeedsLazyPtr(MO1.getGlobal()); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, isLazy); + } else if (MO1.isExternalSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isConstantPoolIndex()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJumpTableIndex()) + emitJumpTableAddress(MO1.getIndex(), rt); + } + } + break; + +In the previous example, ``XXXCodeEmitter.cpp`` uses the variable ``rt``, which +is a ``RelocationType`` enum that may be used to relocate addresses (for +example, a global address with a PIC base offset). The ``RelocationType`` enum +for that target is defined in the short target-specific ``XXXRelocations.h`` +file. The ``RelocationType`` is used by the ``relocate`` method defined in +``XXXJITInfo.cpp`` to rewrite addresses for referenced global symbols. + +For example, ``X86Relocations.h`` specifies the following relocation types for +the X86 addresses. In all four cases, the relocated value is added to the +value already in memory. For ``reloc_pcrel_word`` and ``reloc_picrel_word``, +there is an additional initial adjustment. + +.. code-block:: c++ + + enum RelocationType { + reloc_pcrel_word = 0, // add reloc value after adjusting for the PC loc + reloc_picrel_word = 1, // add reloc value after adjusting for the PIC base + reloc_absolute_word = 2, // absolute relocation; no additional adjustment + reloc_absolute_dword = 3 // absolute relocation; no additional adjustment + }; + +Target JIT Info +--------------- + +``XXXJITInfo.cpp`` implements the JIT interfaces for target-specific +code-generation activities, such as emitting machine code and stubs. At +minimum, a target-specific version of ``XXXJITInfo`` implements the following: + +* ``getLazyResolverFunction`` --- Initializes the JIT, gives the target a + function that is used for compilation. + +* ``emitFunctionStub`` --- Returns a native function with a specified address + for a callback function. + +* ``relocate`` --- Changes the addresses of referenced globals, based on + relocation types. + +* Callback function that are wrappers to a function stub that is used when the + real target is not initially known. + +``getLazyResolverFunction`` is generally trivial to implement. It makes the +incoming parameter as the global ``JITCompilerFunction`` and returns the +callback function that will be used a function wrapper. For the Alpha target +(in ``AlphaJITInfo.cpp``), the ``getLazyResolverFunction`` implementation is +simply: + +.. code-block:: c++ + + TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return AlphaCompilationCallback; + } + +For the X86 target, the ``getLazyResolverFunction`` implementation is a little +more complicated, because it returns a different callback function for +processors with SSE instructions and XMM registers. + +The callback function initially saves and later restores the callee register +values, incoming arguments, and frame and return address. The callback +function needs low-level access to the registers or stack, so it is typically +implemented with assembler. + diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html deleted file mode 100644 index 149b103097ff..000000000000 --- a/docs/WritingAnLLVMPass.html +++ /dev/null @@ -1,1954 +0,0 @@ - - - - - Writing an LLVM Pass - - - - -

- Writing an LLVM Pass -

- -
    -
  1. Introduction - What is a pass?
  2. -
  3. Quick Start - Writing hello world -
  4. -
  5. Pass classes and requirements - -
  6. Pass Registration -
  7. -
  8. Specifying interactions between passes -
  9. -
  10. Implementing Analysis Groups -
  11. -
  12. Pass Statistics -
  13. What PassManager does -
  14. -
  15. Registering dynamically loaded passes -
  16. -
  17. Using GDB with dynamically loaded passes -
  18. -
  19. Future extensions planned -
  20. -
- -
-

Written by Chris Lattner and - Jim Laskey

-
- - -

- Introduction - What is a pass? -

- - -
- -

The LLVM Pass Framework is an important part of the LLVM system, because LLVM -passes are where most of the interesting parts of the compiler exist. Passes -perform the transformations and optimizations that make up the compiler, they -build the analysis results that are used by these transformations, and they are, -above all, a structuring technique for compiler code.

- -

All LLVM passes are subclasses of the Pass -class, which implement functionality by overriding virtual methods inherited -from Pass. Depending on how your pass works, you should inherit from -the ModulePass, CallGraphSCCPass, FunctionPass, or LoopPass, or RegionPass, or BasicBlockPass classes, which gives the system -more information about what your pass does, and how it can be combined with -other passes. One of the main features of the LLVM Pass Framework is that it -schedules passes to run in an efficient way based on the constraints that your -pass meets (which are indicated by which class they derive from).

- -

We start by showing you how to construct a pass, everything from setting up -the code, to compiling, loading, and executing it. After the basics are down, -more advanced features are discussed.

- -
- - -

- Quick Start - Writing hello world -

- - -
- -

Here we describe how to write the "hello world" of passes. The "Hello" pass -is designed to simply print out the name of non-external functions that exist in -the program being compiled. It does not modify the program at all, it just -inspects it. The source code and files for this pass are available in the LLVM -source tree in the lib/Transforms/Hello directory.

- - -

- Setting up the build environment -

- -
- -

First, configure and build LLVM. This needs to be done directly inside the - LLVM source tree rather than in a separate objects directory. - Next, you need to create a new directory somewhere in the LLVM source - base. For this example, we'll assume that you made - lib/Transforms/Hello. Finally, you must set up a build script - (Makefile) that will compile the source code for the new pass. To do this, - copy the following into Makefile:

-
- -
-# Makefile for hello pass
-
-# Path to top level of LLVM hierarchy
-LEVEL = ../../..
-
-# Name of the library to build
-LIBRARYNAME = Hello
-
-# Make the shared library become a loadable module so the tools can 
-# dlopen/dlsym on the resulting library.
-LOADABLE_MODULE = 1
-
-# Include the makefile implementation stuff
-include $(LEVEL)/Makefile.common
-
- -

This makefile specifies that all of the .cpp files in the current -directory are to be compiled and linked together into a shared object -$(LEVEL)/Debug+Asserts/lib/Hello.so that can be dynamically loaded by -the opt or bugpoint tools via their -load options. -If your operating system uses a suffix other than .so (such as windows or -Mac OS/X), the appropriate extension will be used.

- -

If you are used CMake to build LLVM, see -Developing an LLVM pass with CMake.

- -

Now that we have the build scripts set up, we just need to write the code for -the pass itself.

- -
- - -

- Basic code required -

- -
- -

Now that we have a way to compile our new pass, we just have to write it. -Start out with:

- -
-
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/Support/raw_ostream.h"
-
-
- -

Which are needed because we are writing a Pass, -we are operating on Function's, -and we will be doing some printing.

- -

Next we have:

- -
-
-using namespace llvm;
-
-
- -

... which is required because the functions from the include files -live in the llvm namespace.

- -

Next we have:

- -
-
-namespace {
-
-
- -

... which starts out an anonymous namespace. Anonymous namespaces are to C++ -what the "static" keyword is to C (at global scope). It makes the -things declared inside of the anonymous namespace visible only to the current -file. If you're not familiar with them, consult a decent C++ book for more -information.

- -

Next, we declare our pass itself:

- -
-
-  struct Hello : public FunctionPass {
-
-
- -

This declares a "Hello" class that is a subclass of FunctionPass. -The different builtin pass subclasses are described in detail later, but for now, know that FunctionPass's operate on a function at a -time.

- -
-
-    static char ID;
-    Hello() : FunctionPass(ID) {}
-
-
- -

This declares pass identifier used by LLVM to identify pass. This allows LLVM -to avoid using expensive C++ runtime information.

- -
-
-    virtual bool runOnFunction(Function &F) {
-      errs() << "Hello: ";
-      errs().write_escaped(F.getName()) << "\n";
-      return false;
-    }
-  };  // end of struct Hello
-}  // end of anonymous namespace
-
-
- -

We declare a "runOnFunction" method, -which overloads an abstract virtual method inherited from FunctionPass. This is where we are supposed -to do our thing, so we just print out our message with the name of each -function.

- -
-
-char Hello::ID = 0;
-
-
- -

We initialize pass ID here. LLVM uses ID's address to identify a pass, so -initialization value is not important.

- -
-
-static RegisterPass<Hello> X("hello", "Hello World Pass",
-                             false /* Only looks at CFG */,
-                             false /* Analysis Pass */);
-
-
- -

Lastly, we register our class Hello, -giving it a command line argument "hello", and a name "Hello World -Pass". The last two arguments describe its behavior: if a pass walks CFG -without modifying it then the third argument is set to true; if a pass -is an analysis pass, for example dominator tree pass, then true is -supplied as the fourth argument.

- -

As a whole, the .cpp file looks like:

- -
-
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-  struct Hello : public FunctionPass {
-    
-    static char ID;
-    Hello() : FunctionPass(ID) {}
-
-    virtual bool runOnFunction(Function &F) {
-      errs() << "Hello: ";
-      errs().write_escaped(F.getName()) << '\n';
-      return false;
-    }
-
-  };
-}
-  
-char Hello::ID = 0;
-static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
-
-
- -

Now that it's all together, compile the file with a simple "gmake" -command in the local directory and you should get a new file -"Debug+Asserts/lib/Hello.so" under the top level directory of the LLVM -source tree (not in the local directory). Note that everything in this file is -contained in an anonymous namespace — this reflects the fact that passes -are self contained units that do not need external interfaces (although they can -have them) to be useful.

- -
- - -

- Running a pass with opt -

- -
- -

Now that you have a brand new shiny shared object file, we can use the -opt command to run an LLVM program through your pass. Because you -registered your pass with RegisterPass, you will be able to -use the opt tool to access it, once loaded.

- -

To test it, follow the example at the end of the Getting Started Guide to compile "Hello World" to -LLVM. We can now run the bitcode file (hello.bc) for the program -through our transformation like this (or course, any bitcode file will -work):

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null
-Hello: __main
-Hello: puts
-Hello: main
-
- -

The '-load' option specifies that 'opt' should load your -pass as a shared object, which makes '-hello' a valid command line -argument (which is one reason you need to register your -pass). Because the hello pass does not modify the program in any -interesting way, we just throw away the result of opt (sending it to -/dev/null).

- -

To see what happened to the other string you registered, try running -opt with the -help option:

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -help
-OVERVIEW: llvm .bc -> .bc modular optimizer
-
-USAGE: opt [options] <input bitcode>
-
-OPTIONS:
-  Optimizations available:
-...
-    -globalopt                - Global Variable Optimizer
-    -globalsmodref-aa         - Simple mod/ref analysis for globals
-    -gvn                      - Global Value Numbering
-    -hello                    - Hello World Pass
-    -indvars                  - Induction Variable Simplification
-    -inline                   - Function Integration/Inlining
-    -insert-edge-profiling    - Insert instrumentation for edge profiling
-...
-
- -

The pass name gets added as the information string for your pass, giving some -documentation to users of opt. Now that you have a working pass, you -would go ahead and make it do the cool transformations you want. Once you get -it all working and tested, it may become useful to find out how fast your pass -is. The PassManager provides a nice command -line option (--time-passes) that allows you to get information about -the execution time of your pass along with the other passes you queue up. For -example:

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
-Hello: __main
-Hello: puts
-Hello: main
-===============================================================================
-                      ... Pass execution timing report ...
-===============================================================================
-  Total Execution Time: 0.02 seconds (0.0479059 wall clock)
-
-   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Pass Name ---
-   0.0100 (100.0%)   0.0000 (  0.0%)   0.0100 ( 50.0%)   0.0402 ( 84.0%)  Bitcode Writer
-   0.0000 (  0.0%)   0.0100 (100.0%)   0.0100 ( 50.0%)   0.0031 (  6.4%)  Dominator Set Construction
-   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0013 (  2.7%)  Module Verifier
-   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0033 (  6.9%)  Hello World Pass
-   0.0100 (100.0%)   0.0100 (100.0%)   0.0200 (100.0%)   0.0479 (100.0%)  TOTAL
-
- -

As you can see, our implementation above is pretty fast :). The additional -passes listed are automatically inserted by the 'opt' tool to verify -that the LLVM emitted by your pass is still valid and well formed LLVM, which -hasn't been broken somehow.

- -

Now that you have seen the basics of the mechanics behind passes, we can talk -about some more details of how they work and how to use them.

- -
- -
- - -

- Pass classes and requirements -

- - -
- -

One of the first things that you should do when designing a new pass is to -decide what class you should subclass for your pass. The Hello World example uses the FunctionPass class for its implementation, but we -did not discuss why or when this should occur. Here we talk about the classes -available, from the most general to the most specific.

- -

When choosing a superclass for your Pass, you should choose the most -specific class possible, while still being able to meet the requirements -listed. This gives the LLVM Pass Infrastructure information necessary to -optimize how passes are run, so that the resultant compiler isn't unnecessarily -slow.

- - -

- The ImmutablePass class -

- -
- -

The most plain and boring type of pass is the "ImmutablePass" -class. This pass type is used for passes that do not have to be run, do not -change state, and never need to be updated. This is not a normal type of -transformation or analysis, but can provide information about the current -compiler configuration.

- -

Although this pass class is very infrequently used, it is important for -providing information about the current target machine being compiled for, and -other static information that can affect the various transformations.

- -

ImmutablePasses never invalidate other transformations, are never -invalidated, and are never "run".

- -
- - -

- The ModulePass class -

- -
- -

The "ModulePass" -class is the most general of all superclasses that you can use. Deriving from -ModulePass indicates that your pass uses the entire program as a unit, -referring to function bodies in no predictable order, or adding and removing -functions. Because nothing is known about the behavior of ModulePass -subclasses, no optimization can be done for their execution.

- -

A module pass can use function level passes (e.g. dominators) using -the getAnalysis interface -getAnalysis<DominatorTree>(llvm::Function *) to provide the -function to retrieve analysis result for, if the function pass does not require -any module or immutable passes. Note that this can only be done for functions for which the -analysis ran, e.g. in the case of dominators you should only ask for the -DominatorTree for function definitions, not declarations.

- -

To write a correct ModulePass subclass, derive from -ModulePass and overload the runOnModule method with the -following signature:

- - -

- The runOnModule method -

- -
- -
-virtual bool runOnModule(Module &M) = 0;
-
- -

The runOnModule method performs the interesting work of the pass. -It should return true if the module was modified by the transformation and -false otherwise.

- -
- -
- - -

- The CallGraphSCCPass class -

- -
- -

The "CallGraphSCCPass" -is used by passes that need to traverse the program bottom-up on the call graph -(callees before callers). Deriving from CallGraphSCCPass provides some -mechanics for building and traversing the CallGraph, but also allows the system -to optimize execution of CallGraphSCCPass's. If your pass meets the -requirements outlined below, and doesn't meet the requirements of a FunctionPass or BasicBlockPass, you should derive from -CallGraphSCCPass.

- -

TODO: explain briefly what SCC, Tarjan's algo, and B-U mean.

- -

To be explicit, CallGraphSCCPass subclasses are:

- -
    - -
  1. ... not allowed to inspect or modify any Functions other -than those in the current SCC and the direct callers and direct callees of the -SCC.
  2. - -
  3. ... required to preserve the current CallGraph object, updating it -to reflect any changes made to the program.
  4. - -
  5. ... not allowed to add or remove SCC's from the current Module, -though they may change the contents of an SCC.
  6. - -
  7. ... allowed to add or remove global variables from the current -Module.
  8. - -
  9. ... allowed to maintain state across invocations of - runOnSCC (including global data).
  10. -
- -

Implementing a CallGraphSCCPass is slightly tricky in some cases -because it has to handle SCCs with more than one node in it. All of the virtual -methods described below should return true if they modified the program, or -false if they didn't.

- - -

- - The doInitialization(CallGraph &) method - -

- -
- -
-virtual bool doInitialization(CallGraph &CG);
-
- -

The doIninitialize method is allowed to do most of the things that -CallGraphSCCPass's are not allowed to do. They can add and remove -functions, get pointers to functions, etc. The doInitialization method -is designed to do simple initialization type of stuff that does not depend on -the SCCs being processed. The doInitialization method call is not -scheduled to overlap with any other pass executions (thus it should be very -fast).

- -
- - -

- The runOnSCC method -

- -
- -
-virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
-
- -

The runOnSCC method performs the interesting work of the pass, and -should return true if the module was modified by the transformation, false -otherwise.

- -
- - -

- - The doFinalization(CallGraph &) method - -

- -
- -
-virtual bool doFinalization(CallGraph &CG);
-
- -

The doFinalization method is an infrequently used method that is -called when the pass framework has finished calling runOnFunction for every function in the -program being compiled.

- -
- -
- - -

- The FunctionPass class -

- -
- -

In contrast to ModulePass subclasses, FunctionPass -subclasses do have a predictable, local behavior that can be expected by the -system. All FunctionPass execute on each function in the program -independent of all of the other functions in the program. -FunctionPass's do not require that they are executed in a particular -order, and FunctionPass's do not modify external functions.

- -

To be explicit, FunctionPass subclasses are not allowed to:

- -
    -
  1. Modify a Function other than the one currently being processed.
  2. -
  3. Add or remove Function's from the current Module.
  4. -
  5. Add or remove global variables from the current Module.
  6. -
  7. Maintain state across invocations of - runOnFunction (including global data)
  8. -
- -

Implementing a FunctionPass is usually straightforward (See the Hello World pass for example). FunctionPass's -may overload three virtual methods to do their work. All of these methods -should return true if they modified the program, or false if they didn't.

- - -

- - The doInitialization(Module &) method - -

- -
- -
-virtual bool doInitialization(Module &M);
-
- -

The doIninitialize method is allowed to do most of the things that -FunctionPass's are not allowed to do. They can add and remove -functions, get pointers to functions, etc. The doInitialization method -is designed to do simple initialization type of stuff that does not depend on -the functions being processed. The doInitialization method call is not -scheduled to overlap with any other pass executions (thus it should be very -fast).

- -

A good example of how this method should be used is the LowerAllocations -pass. This pass converts malloc and free instructions into -platform dependent malloc() and free() function calls. It -uses the doInitialization method to get a reference to the malloc and -free functions that it needs, adding prototypes to the module if necessary.

- -
- - -

- The runOnFunction method -

- -
- -
-virtual bool runOnFunction(Function &F) = 0;
-

- -

The runOnFunction method must be implemented by your subclass to do -the transformation or analysis work of your pass. As usual, a true value should -be returned if the function is modified.

- -
- - -

- - The doFinalization(Module &) method - -

- -
- -
-virtual bool doFinalization(Module &M);
-
- -

The doFinalization method is an infrequently used method that is -called when the pass framework has finished calling runOnFunction for every function in the -program being compiled.

- -
- -
- - -

- The LoopPass class -

- -
- -

All LoopPass execute on each loop in the function independent of -all of the other loops in the function. LoopPass processes loops in -loop nest order such that outer most loop is processed last.

- -

LoopPass subclasses are allowed to update loop nest using -LPPassManager interface. Implementing a loop pass is usually -straightforward. LoopPass's may overload three virtual methods to -do their work. All these methods should return true if they modified the -program, or false if they didn't.

- - -

- - The doInitialization(Loop *,LPPassManager &) method - -

- -
- -
-virtual bool doInitialization(Loop *, LPPassManager &LPM);
-
- -

The doInitialization method is designed to do simple initialization -type of stuff that does not depend on the functions being processed. The -doInitialization method call is not scheduled to overlap with any -other pass executions (thus it should be very fast). LPPassManager -interface should be used to access Function or Module level analysis -information.

- -
- - - -

- The runOnLoop method -

- -
- -
-virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0;
-

- -

The runOnLoop method must be implemented by your subclass to do -the transformation or analysis work of your pass. As usual, a true value should -be returned if the function is modified. LPPassManager interface -should be used to update loop nest.

- -
- - -

- The doFinalization() method -

- -
- -
-virtual bool doFinalization();
-
- -

The doFinalization method is an infrequently used method that is -called when the pass framework has finished calling runOnLoop for every loop in the -program being compiled.

- -
- -
- - -

- The RegionPass class -

- -
- -

RegionPass is similar to LoopPass, -but executes on each single entry single exit region in the function. -RegionPass processes regions in nested order such that the outer most -region is processed last.

- -

RegionPass subclasses are allowed to update the region tree by using -the RGPassManager interface. You may overload three virtual methods of -RegionPass to implement your own region pass. All these -methods should return true if they modified the program, or false if they didn not. -

- - -

- - The doInitialization(Region *, RGPassManager &) method - -

- -
- -
-virtual bool doInitialization(Region *, RGPassManager &RGM);
-
- -

The doInitialization method is designed to do simple initialization -type of stuff that does not depend on the functions being processed. The -doInitialization method call is not scheduled to overlap with any -other pass executions (thus it should be very fast). RPPassManager -interface should be used to access Function or Module level analysis -information.

- -
- - - -

- The runOnRegion method -

- -
- -
-virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0;
-

- -

The runOnRegion method must be implemented by your subclass to do -the transformation or analysis work of your pass. As usual, a true value should -be returned if the region is modified. RGPassManager interface -should be used to update region tree.

- -
- - -

- The doFinalization() method -

- -
- -
-virtual bool doFinalization();
-
- -

The doFinalization method is an infrequently used method that is -called when the pass framework has finished calling runOnRegion for every region in the -program being compiled.

- -
- -
- - -

- The BasicBlockPass class -

- -
- -

BasicBlockPass's are just like FunctionPass's, except that they must limit -their scope of inspection and modification to a single basic block at a time. -As such, they are not allowed to do any of the following:

- -
    -
  1. Modify or inspect any basic blocks outside of the current one
  2. -
  3. Maintain state across invocations of - runOnBasicBlock
  4. -
  5. Modify the control flow graph (by altering terminator instructions)
  6. -
  7. Any of the things forbidden for - FunctionPasses.
  8. -
- -

BasicBlockPasses are useful for traditional local and "peephole" -optimizations. They may override the same doInitialization(Module &) and doFinalization(Module &) methods that FunctionPass's have, but also have the following virtual methods that may also be implemented:

- - -

- - The doInitialization(Function &) method - -

- -
- -
-virtual bool doInitialization(Function &F);
-
- -

The doIninitialize method is allowed to do most of the things that -BasicBlockPass's are not allowed to do, but that -FunctionPass's can. The doInitialization method is designed -to do simple initialization that does not depend on the -BasicBlocks being processed. The doInitialization method call is not -scheduled to overlap with any other pass executions (thus it should be very -fast).

- -
- - -

- The runOnBasicBlock method -

- -
- -
-virtual bool runOnBasicBlock(BasicBlock &BB) = 0;
-
- -

Override this function to do the work of the BasicBlockPass. This -function is not allowed to inspect or modify basic blocks other than the -parameter, and are not allowed to modify the CFG. A true value must be returned -if the basic block is modified.

- -
- - -

- - The doFinalization(Function &) method - -

- -
- -
-virtual bool doFinalization(Function &F);
-
- -

The doFinalization method is an infrequently used method that is -called when the pass framework has finished calling runOnBasicBlock for every BasicBlock in the -program being compiled. This can be used to perform per-function -finalization.

- -
- -
- - -

- The MachineFunctionPass class -

- -
- -

A MachineFunctionPass is a part of the LLVM code generator that -executes on the machine-dependent representation of each LLVM function in the -program.

- -

Code generator passes are registered and initialized specially by -TargetMachine::addPassesToEmitFile and similar routines, so they -cannot generally be run from the opt or bugpoint -commands.

- -

A MachineFunctionPass is also a FunctionPass, so all -the restrictions that apply to a FunctionPass also apply to it. -MachineFunctionPasses also have additional restrictions. In particular, -MachineFunctionPasses are not allowed to do any of the following:

- -
    -
  1. Modify or create any LLVM IR Instructions, BasicBlocks, Arguments, - Functions, GlobalVariables, GlobalAliases, or Modules.
  2. -
  3. Modify a MachineFunction other than the one currently being processed.
  4. -
  5. Maintain state across invocations of runOnMachineFunction (including global -data)
  6. -
- - -

- - The runOnMachineFunction(MachineFunction &MF) method - -

- -
- -
-virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
-
- -

runOnMachineFunction can be considered the main entry point of a -MachineFunctionPass; that is, you should override this method to do the -work of your MachineFunctionPass.

- -

The runOnMachineFunction method is called on every -MachineFunction in a Module, so that the -MachineFunctionPass may perform optimizations on the machine-dependent -representation of the function. If you want to get at the LLVM Function -for the MachineFunction you're working on, use -MachineFunction's getFunction() accessor method -- but -remember, you may not modify the LLVM Function or its contents from a -MachineFunctionPass.

- -
- -
- -
- - -

- Pass registration -

- - -
- -

In the Hello World example pass we illustrated how -pass registration works, and discussed some of the reasons that it is used and -what it does. Here we discuss how and why passes are registered.

- -

As we saw above, passes are registered with the RegisterPass -template. The template parameter is the name of the pass that is to be used on -the command line to specify that the pass should be added to a program (for -example, with opt or bugpoint). The first argument is the -name of the pass, which is to be used for the -help output of -programs, as -well as for debug output generated by the --debug-pass option.

- -

If you want your pass to be easily dumpable, you should -implement the virtual print method:

- - -

- The print method -

- -
- -
-virtual void print(std::ostream &O, const Module *M) const;
-
- -

The print method must be implemented by "analyses" in order to print -a human readable version of the analysis results. This is useful for debugging -an analysis itself, as well as for other people to figure out how an analysis -works. Use the opt -analyze argument to invoke this method.

- -

The llvm::OStream parameter specifies the stream to write the results on, -and the Module parameter gives a pointer to the top level module of the -program that has been analyzed. Note however that this pointer may be null in -certain circumstances (such as calling the Pass::dump() from a -debugger), so it should only be used to enhance debug output, it should not be -depended on.

- -
- -
- - -

- Specifying interactions between passes -

- - -
- -

One of the main responsibilities of the PassManager is to make sure -that passes interact with each other correctly. Because PassManager -tries to optimize the execution of passes it must -know how the passes interact with each other and what dependencies exist between -the various passes. To track this, each pass can declare the set of passes that -are required to be executed before the current pass, and the passes which are -invalidated by the current pass.

- -

Typically this functionality is used to require that analysis results are -computed before your pass is run. Running arbitrary transformation passes can -invalidate the computed analysis results, which is what the invalidation set -specifies. If a pass does not implement the getAnalysisUsage method, it defaults to not -having any prerequisite passes, and invalidating all other passes.

- - -

- The getAnalysisUsage method -

- -
- -
-virtual void getAnalysisUsage(AnalysisUsage &Info) const;
-
- -

By implementing the getAnalysisUsage method, the required and -invalidated sets may be specified for your transformation. The implementation -should fill in the AnalysisUsage -object with information about which passes are required and not invalidated. To -do this, a pass may call any of the following methods on the AnalysisUsage -object:

-
- - -

- - The AnalysisUsage::addRequired<> - and AnalysisUsage::addRequiredTransitive<> methods - -

- -
-

-If your pass requires a previous pass to be executed (an analysis for example), -it can use one of these methods to arrange for it to be run before your pass. -LLVM has many different types of analyses and passes that can be required, -spanning the range from DominatorSet to BreakCriticalEdges. -Requiring BreakCriticalEdges, for example, guarantees that there will -be no critical edges in the CFG when your pass has been run. -

- -

-Some analyses chain to other analyses to do their job. For example, an AliasAnalysis implementation is required to chain to other alias analysis passes. In -cases where analyses chain, the addRequiredTransitive method should be -used instead of the addRequired method. This informs the PassManager -that the transitively required pass should be alive as long as the requiring -pass is. -

-
- - -

- - The AnalysisUsage::addPreserved<> method - -

- -
-

-One of the jobs of the PassManager is to optimize how and when analyses are run. -In particular, it attempts to avoid recomputing data unless it needs to. For -this reason, passes are allowed to declare that they preserve (i.e., they don't -invalidate) an existing analysis if it's available. For example, a simple -constant folding pass would not modify the CFG, so it can't possibly affect the -results of dominator analysis. By default, all passes are assumed to invalidate -all others. -

- -

-The AnalysisUsage class provides several methods which are useful in -certain circumstances that are related to addPreserved. In particular, -the setPreservesAll method can be called to indicate that the pass does -not modify the LLVM program at all (which is true for analyses), and the -setPreservesCFG method can be used by transformations that change -instructions in the program but do not modify the CFG or terminator instructions -(note that this property is implicitly set for BasicBlockPass's). -

- -

-addPreserved is particularly useful for transformations like -BreakCriticalEdges. This pass knows how to update a small set of loop -and dominator related analyses if they exist, so it can preserve them, despite -the fact that it hacks on the CFG. -

-
- - -

- - Example implementations of getAnalysisUsage - -

- -
- -
-// This example modifies the program, but does not modify the CFG
-void LICM::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  AU.addRequired<LoopInfo>();
-}
-
- -
- - -

- - The getAnalysis<> and - getAnalysisIfAvailable<> methods - -

- -
- -

The Pass::getAnalysis<> method is automatically inherited by -your class, providing you with access to the passes that you declared that you -required with the getAnalysisUsage -method. It takes a single template argument that specifies which pass class you -want, and returns a reference to that pass. For example:

- -
-bool LICM::runOnFunction(Function &F) {
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-  ...
-}
-
- -

This method call returns a reference to the pass desired. You may get a -runtime assertion failure if you attempt to get an analysis that you did not -declare as required in your getAnalysisUsage implementation. This -method can be called by your run* method implementation, or by any -other local method invoked by your run* method. - -A module level pass can use function level analysis info using this interface. -For example:

- -
-bool ModuleLevelPass::runOnModule(Module &M) {
-  ...
-  DominatorTree &DT = getAnalysis<DominatorTree>(Func);
-  ...
-}
-
- -

In above example, runOnFunction for DominatorTree is called by pass manager -before returning a reference to the desired pass.

- -

-If your pass is capable of updating analyses if they exist (e.g., -BreakCriticalEdges, as described above), you can use the -getAnalysisIfAvailable method, which returns a pointer to the analysis -if it is active. For example:

- -
-...
-if (DominatorSet *DS = getAnalysisIfAvailable<DominatorSet>()) {
-  // A DominatorSet is active.  This code will update it.
-}
-...
-
- -
- -
- - -

- Implementing Analysis Groups -

- - -
- -

Now that we understand the basics of how passes are defined, how they are -used, and how they are required from other passes, it's time to get a little bit -fancier. All of the pass relationships that we have seen so far are very -simple: one pass depends on one other specific pass to be run before it can run. -For many applications, this is great, for others, more flexibility is -required.

- -

In particular, some analyses are defined such that there is a single simple -interface to the analysis results, but multiple ways of calculating them. -Consider alias analysis for example. The most trivial alias analysis returns -"may alias" for any alias query. The most sophisticated analysis a -flow-sensitive, context-sensitive interprocedural analysis that can take a -significant amount of time to execute (and obviously, there is a lot of room -between these two extremes for other implementations). To cleanly support -situations like this, the LLVM Pass Infrastructure supports the notion of -Analysis Groups.

- - -

- Analysis Group Concepts -

- -
- -

An Analysis Group is a single simple interface that may be implemented by -multiple different passes. Analysis Groups can be given human readable names -just like passes, but unlike passes, they need not derive from the Pass -class. An analysis group may have one or more implementations, one of which is -the "default" implementation.

- -

Analysis groups are used by client passes just like other passes are: the -AnalysisUsage::addRequired() and Pass::getAnalysis() methods. -In order to resolve this requirement, the PassManager -scans the available passes to see if any implementations of the analysis group -are available. If none is available, the default implementation is created for -the pass to use. All standard rules for interaction -between passes still apply.

- -

Although Pass Registration is optional for normal -passes, all analysis group implementations must be registered, and must use the -INITIALIZE_AG_PASS template to join the -implementation pool. Also, a default implementation of the interface -must be registered with RegisterAnalysisGroup.

- -

As a concrete example of an Analysis Group in action, consider the AliasAnalysis -analysis group. The default implementation of the alias analysis interface (the -basicaa -pass) just does a few simple checks that don't require significant analysis to -compute (such as: two different globals can never alias each other, etc). -Passes that use the AliasAnalysis -interface (for example the gcse pass), do -not care which implementation of alias analysis is actually provided, they just -use the designated interface.

- -

From the user's perspective, commands work just like normal. Issuing the -command 'opt -gcse ...' will cause the basicaa class to be -instantiated and added to the pass sequence. Issuing the command 'opt --somefancyaa -gcse ...' will cause the gcse pass to use the -somefancyaa alias analysis (which doesn't actually exist, it's just a -hypothetical example) instead.

- -
- - -

- Using RegisterAnalysisGroup -

- -
- -

The RegisterAnalysisGroup template is used to register the analysis -group itself, while the INITIALIZE_AG_PASS is used to add pass -implementations to the analysis group. First, -an analysis group should be registered, with a human readable name -provided for it. -Unlike registration of passes, there is no command line argument to be specified -for the Analysis Group Interface itself, because it is "abstract":

- -
-static RegisterAnalysisGroup<AliasAnalysis> A("Alias Analysis");
-
- -

Once the analysis is registered, passes can declare that they are valid -implementations of the interface by using the following code:

- -
-namespace {
-  // Declare that we implement the AliasAnalysis interface
-  INITIALIZE_AG_PASS(FancyAA, AliasAnalysis, "somefancyaa",
-                     "A more complex alias analysis implementation",
-                     false,  // Is CFG Only?
-                     true,   // Is Analysis?
-                     false); // Is default Analysis Group implementation?
-}
-
- -

This just shows a class FancyAA that -uses the INITIALIZE_AG_PASS macro both to register and -to "join" the AliasAnalysis -analysis group. Every implementation of an analysis group should join using -this macro.

- -
-namespace {
-  // Declare that we implement the AliasAnalysis interface
-  INITIALIZE_AG_PASS(BasicAA, AliasAnalysis, "basicaa",
-                     "Basic Alias Analysis (default AA impl)",
-                     false, // Is CFG Only?
-                     true,  // Is Analysis?
-                     true); // Is default Analysis Group implementation?
-}
-
- -

Here we show how the default implementation is specified (using the final -argument to the INITIALIZE_AG_PASS template). There must be exactly -one default implementation available at all times for an Analysis Group to be -used. Only default implementation can derive from ImmutablePass. -Here we declare that the - BasicAliasAnalysis -pass is the default implementation for the interface.

- -
- -
- - -

- Pass Statistics -

- - -
-

The Statistic -class is designed to be an easy way to expose various success -metrics from passes. These statistics are printed at the end of a -run, when the -stats command line option is enabled on the command -line. See the Statistics section in the Programmer's Manual for details. - -

- - - -

- What PassManager does -

- - -
- -

The PassManager -class -takes a list of passes, ensures their prerequisites -are set up correctly, and then schedules passes to run efficiently. All of the -LLVM tools that run passes use the PassManager for execution of these -passes.

- -

The PassManager does two main things to try to reduce the execution -time of a series of passes:

- -
    -
  1. Share analysis results - The PassManager attempts to avoid -recomputing analysis results as much as possible. This means keeping track of -which analyses are available already, which analyses get invalidated, and which -analyses are needed to be run for a pass. An important part of work is that the -PassManager tracks the exact lifetime of all analysis results, allowing -it to free memory allocated to holding analysis -results as soon as they are no longer needed.
  2. - -
  3. Pipeline the execution of passes on the program - The -PassManager attempts to get better cache and memory usage behavior out -of a series of passes by pipelining the passes together. This means that, given -a series of consecutive FunctionPass's, it -will execute all of the FunctionPass's on -the first function, then all of the FunctionPasses on the second function, -etc... until the entire program has been run through the passes. - -

    This improves the cache behavior of the compiler, because it is only touching -the LLVM program representation for a single function at a time, instead of -traversing the entire program. It reduces the memory consumption of compiler, -because, for example, only one DominatorSet -needs to be calculated at a time. This also makes it possible to implement -some interesting enhancements in the future.

  4. - -
- -

The effectiveness of the PassManager is influenced directly by how -much information it has about the behaviors of the passes it is scheduling. For -example, the "preserved" set is intentionally conservative in the face of an -unimplemented getAnalysisUsage method. -Not implementing when it should be implemented will have the effect of not -allowing any analysis results to live across the execution of your pass.

- -

The PassManager class exposes a --debug-pass command line -options that is useful for debugging pass execution, seeing how things work, and -diagnosing when you should be preserving more analyses than you currently are -(To get information about all of the variants of the --debug-pass -option, just type 'opt -help-hidden').

- -

By using the --debug-pass=Structure option, for example, we can see -how our Hello World pass interacts with other passes. -Lets try it out with the gcse and licm passes:

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Natural Loop Construction
---  Loop Invariant Code Motion
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-
- -

This output shows us when passes are constructed and when the analysis -results are known to be dead (prefixed with '--'). Here we see that -GCSE uses dominator and immediate dominator information to do its job. The LICM -pass uses natural loop information, which uses dominator sets, but not immediate -dominators. Because immediate dominators are no longer useful after the GCSE -pass, it is immediately destroyed. The dominator sets are then reused to -compute natural loop information, which is then used by the LICM pass.

- -

After the LICM pass, the module verifier runs (which is automatically added -by the 'opt' tool), which uses the dominator set to check that the -resultant LLVM code is well formed. After it finishes, the dominator set -information is destroyed, after being computed once, and shared by three -passes.

- -

Lets see how this changes when we run the Hello -World pass in between the two passes:

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
---  Dominator Set Construction
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-    Hello World Pass
---  Hello World Pass
-    Dominator Set Construction
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Natural Loop Construction
---  Loop Invariant Code Motion
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-Hello: __main
-Hello: puts
-Hello: main
-
- -

Here we see that the Hello World pass has killed the -Dominator Set pass, even though it doesn't modify the code at all! To fix this, -we need to add the following getAnalysisUsage method to our pass:

- -
-// We don't modify the program, so we preserve all analyses
-virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-}
-
- -

Now when we run our pass, we get this output:

- -
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
-Pass Arguments:  -gcse -hello -licm
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-    Hello World Pass
---  Hello World Pass
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Loop Invariant Code Motion
---  Natural Loop Construction
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-Hello: __main
-Hello: puts
-Hello: main
-
- -

Which shows that we don't accidentally invalidate dominator information -anymore, and therefore do not have to compute it twice.

- - -

- The releaseMemory method -

- -
- -
-  virtual void releaseMemory();
-
- -

The PassManager automatically determines when to compute analysis -results, and how long to keep them around for. Because the lifetime of the pass -object itself is effectively the entire duration of the compilation process, we -need some way to free analysis results when they are no longer useful. The -releaseMemory virtual method is the way to do this.

- -

If you are writing an analysis or any other pass that retains a significant -amount of state (for use by another pass which "requires" your pass and uses the -getAnalysis method) you should implement -releaseMemory to, well, release the memory allocated to maintain this -internal state. This method is called after the run* method for the -class, before the next call of run* in your pass.

- -
- -
- - -

- Registering dynamically loaded passes -

- - -
- -

Size matters when constructing production quality tools using llvm, -both for the purposes of distribution, and for regulating the resident code size -when running on the target system. Therefore, it becomes desirable to -selectively use some passes, while omitting others and maintain the flexibility -to change configurations later on. You want to be able to do all this, and, -provide feedback to the user. This is where pass registration comes into -play.

- -

The fundamental mechanisms for pass registration are the -MachinePassRegistry class and subclasses of -MachinePassRegistryNode.

- -

An instance of MachinePassRegistry is used to maintain a list of -MachinePassRegistryNode objects. This instance maintains the list and -communicates additions and deletions to the command line interface.

- -

An instance of MachinePassRegistryNode subclass is used to maintain -information provided about a particular pass. This information includes the -command line name, the command help string and the address of the function used -to create an instance of the pass. A global static constructor of one of these -instances registers with a corresponding MachinePassRegistry, -the static destructor unregisters. Thus a pass that is statically linked -in the tool will be registered at start up. A dynamically loaded pass will -register on load and unregister at unload.

- - -

- Using existing registries -

- -
- -

There are predefined registries to track instruction scheduling -(RegisterScheduler) and register allocation (RegisterRegAlloc) -machine passes. Here we will describe how to register a register -allocator machine pass.

- -

Implement your register allocator machine pass. In your register allocator -.cpp file add the following include;

- -
-#include "llvm/CodeGen/RegAllocRegistry.h"
-
- -

Also in your register allocator .cpp file, define a creator function in the -form;

- -
-FunctionPass *createMyRegisterAllocator() {
-  return new MyRegisterAllocator();
-}
-
- -

Note that the signature of this function should match the type of -RegisterRegAlloc::FunctionPassCtor. In the same file add the -"installing" declaration, in the form;

- -
-static RegisterRegAlloc myRegAlloc("myregalloc",
-                                   "my register allocator help string",
-                                   createMyRegisterAllocator);
-
- -

Note the two spaces prior to the help string produces a tidy result on the --help query.

- -
-$ llc -help
-  ...
-  -regalloc                    - Register allocator to use (default=linearscan)
-    =linearscan                -   linear scan register allocator
-    =local                     -   local register allocator
-    =simple                    -   simple register allocator
-    =myregalloc                -   my register allocator help string
-  ...
-
- -

And that's it. The user is now free to use -regalloc=myregalloc as -an option. Registering instruction schedulers is similar except use the -RegisterScheduler class. Note that the -RegisterScheduler::FunctionPassCtor is significantly different from -RegisterRegAlloc::FunctionPassCtor.

- -

To force the load/linking of your register allocator into the llc/lli tools, -add your creator function's global declaration to "Passes.h" and add a "pseudo" -call line to llvm/Codegen/LinkAllCodegenComponents.h.

- -
- - - -

- Creating new registries -

- -
- -

The easiest way to get started is to clone one of the existing registries; we -recommend llvm/CodeGen/RegAllocRegistry.h. The key things to modify -are the class name and the FunctionPassCtor type.

- -

Then you need to declare the registry. Example: if your pass registry is -RegisterMyPasses then define;

- -
-MachinePassRegistry RegisterMyPasses::Registry;
-
- -

And finally, declare the command line option for your passes. Example:

- -
-cl::opt<RegisterMyPasses::FunctionPassCtor, false,
-        RegisterPassParser<RegisterMyPasses> >
-MyPassOpt("mypass",
-          cl::init(&createDefaultMyPass),
-          cl::desc("my pass option help")); 
-
- -

Here the command option is "mypass", with createDefaultMyPass as the default -creator.

- -
- -
- - -

- Using GDB with dynamically loaded passes -

- - -
- -

Unfortunately, using GDB with dynamically loaded passes is not as easy as it -should be. First of all, you can't set a breakpoint in a shared object that has -not been loaded yet, and second of all there are problems with inlined functions -in shared objects. Here are some suggestions to debugging your pass with -GDB.

- -

For sake of discussion, I'm going to assume that you are debugging a -transformation invoked by opt, although nothing described here depends -on that.

- - -

- Setting a breakpoint in your pass -

- -
- -

First thing you do is start gdb on the opt process:

- -
-$ gdb opt
-GNU gdb 5.0
-Copyright 2000 Free Software Foundation, Inc.
-GDB is free software, covered by the GNU General Public License, and you are
-welcome to change it and/or distribute copies of it under certain conditions.
-Type "show copying" to see the conditions.
-There is absolutely no warranty for GDB.  Type "show warranty" for details.
-This GDB was configured as "sparc-sun-solaris2.6"...
-(gdb)
-
- -

Note that opt has a lot of debugging information in it, so it takes -time to load. Be patient. Since we cannot set a breakpoint in our pass yet -(the shared object isn't loaded until runtime), we must execute the process, and -have it stop before it invokes our pass, but after it has loaded the shared -object. The most foolproof way of doing this is to set a breakpoint in -PassManager::run and then run the process with the arguments you -want:

- -
-(gdb) break llvm::PassManager::run
-Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
-(gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
-Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
-Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
-70      bool PassManager::run(Module &M) { return PM->run(M); }
-(gdb)
-
- -

Once the opt stops in the PassManager::run method you are -now free to set breakpoints in your pass so that you can trace through execution -or do other standard debugging stuff.

- -
- - -

- Miscellaneous Problems -

- -
- -

Once you have the basics down, there are a couple of problems that GDB has, -some with solutions, some without.

- -
    -
  • Inline functions have bogus stack information. In general, GDB does a -pretty good job getting stack traces and stepping through inline functions. -When a pass is dynamically loaded however, it somehow completely loses this -capability. The only solution I know of is to de-inline a function (move it -from the body of a class to a .cpp file).
  • - -
  • Restarting the program breaks breakpoints. After following the information -above, you have succeeded in getting some breakpoints planted in your pass. Nex -thing you know, you restart the program (i.e., you type 'run' again), -and you start getting errors about breakpoints being unsettable. The only way I -have found to "fix" this problem is to delete the breakpoints that are -already set in your pass, run the program, and re-set the breakpoints once -execution stops in PassManager::run.
  • - -
- -

Hopefully these tips will help with common case debugging situations. If -you'd like to contribute some tips of your own, just contact Chris.

- -
- -
- - -

- Future extensions planned -

- - -
- -

Although the LLVM Pass Infrastructure is very capable as it stands, and does -some nifty stuff, there are things we'd like to add in the future. Here is -where we are going:

- - -

- Multithreaded LLVM -

- -
- -

Multiple CPU machines are becoming more common and compilation can never be -fast enough: obviously we should allow for a multithreaded compiler. Because of -the semantics defined for passes above (specifically they cannot maintain state -across invocations of their run* methods), a nice clean way to -implement a multithreaded compiler would be for the PassManager class -to create multiple instances of each pass object, and allow the separate -instances to be hacking on different parts of the program at the same time.

- -

This implementation would prevent each of the passes from having to implement -multithreaded constructs, requiring only the LLVM core to have locking in a few -places (for global resources). Although this is a simple extension, we simply -haven't had time (or multiprocessor machines, thus a reason) to implement this. -Despite that, we have kept the LLVM passes SMP ready, and you should too.

- -
- -
- - -
-
- Valid CSS - Valid HTML 4.01 - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $ -
- - - diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst new file mode 100644 index 000000000000..b10d98f87e2b --- /dev/null +++ b/docs/WritingAnLLVMPass.rst @@ -0,0 +1,1436 @@ +==================== +Writing an LLVM Pass +==================== + +.. contents:: + :local: + +Introduction --- What is a pass? +================================ + +The LLVM Pass Framework is an important part of the LLVM system, because LLVM +passes are where most of the interesting parts of the compiler exist. Passes +perform the transformations and optimizations that make up the compiler, they +build the analysis results that are used by these transformations, and they +are, above all, a structuring technique for compiler code. + +All LLVM passes are subclasses of the `Pass +`_ class, which implement +functionality by overriding virtual methods inherited from ``Pass``. Depending +on how your pass works, you should inherit from the :ref:`ModulePass +` , :ref:`CallGraphSCCPass +`, :ref:`FunctionPass +` , or :ref:`LoopPass +`, or :ref:`RegionPass +`, or :ref:`BasicBlockPass +` classes, which gives the system more +information about what your pass does, and how it can be combined with other +passes. One of the main features of the LLVM Pass Framework is that it +schedules passes to run in an efficient way based on the constraints that your +pass meets (which are indicated by which class they derive from). + +We start by showing you how to construct a pass, everything from setting up the +code, to compiling, loading, and executing it. After the basics are down, more +advanced features are discussed. + +Quick Start --- Writing hello world +=================================== + +Here we describe how to write the "hello world" of passes. The "Hello" pass is +designed to simply print out the name of non-external functions that exist in +the program being compiled. It does not modify the program at all, it just +inspects it. The source code and files for this pass are available in the LLVM +source tree in the ``lib/Transforms/Hello`` directory. + +.. _writing-an-llvm-pass-makefile: + +Setting up the build environment +-------------------------------- + +.. FIXME: Why does this recommend to build in-tree? + +First, configure and build LLVM. This needs to be done directly inside the +LLVM source tree rather than in a separate objects directory. Next, you need +to create a new directory somewhere in the LLVM source base. For this example, +we'll assume that you made ``lib/Transforms/Hello``. Finally, you must set up +a build script (``Makefile``) that will compile the source code for the new +pass. To do this, copy the following into ``Makefile``: + +.. code-block:: make + + # Makefile for hello pass + + # Path to top level of LLVM hierarchy + LEVEL = ../../.. + + # Name of the library to build + LIBRARYNAME = Hello + + # Make the shared library become a loadable module so the tools can + # dlopen/dlsym on the resulting library. + LOADABLE_MODULE = 1 + + # Include the makefile implementation stuff + include $(LEVEL)/Makefile.common + +This makefile specifies that all of the ``.cpp`` files in the current directory +are to be compiled and linked together into a shared object +``$(LEVEL)/Debug+Asserts/lib/Hello.so`` that can be dynamically loaded by the +:program:`opt` or :program:`bugpoint` tools via their :option:`-load` options. +If your operating system uses a suffix other than ``.so`` (such as Windows or Mac +OS X), the appropriate extension will be used. + +If you are used CMake to build LLVM, see :ref:`cmake-out-of-source-pass`. + +Now that we have the build scripts set up, we just need to write the code for +the pass itself. + +.. _writing-an-llvm-pass-basiccode: + +Basic code required +------------------- + +Now that we have a way to compile our new pass, we just have to write it. +Start out with: + +.. code-block:: c++ + + #include "llvm/Pass.h" + #include "llvm/Function.h" + #include "llvm/Support/raw_ostream.h" + +Which are needed because we are writing a `Pass +`_, we are operating on +`Function `_\ s, and we will +be doing some printing. + +Next we have: + +.. code-block:: c++ + + using namespace llvm; + +... which is required because the functions from the include files live in the +llvm namespace. + +Next we have: + +.. code-block:: c++ + + namespace { + +... which starts out an anonymous namespace. Anonymous namespaces are to C++ +what the "``static``" keyword is to C (at global scope). It makes the things +declared inside of the anonymous namespace visible only to the current file. +If you're not familiar with them, consult a decent C++ book for more +information. + +Next, we declare our pass itself: + +.. code-block:: c++ + + struct Hello : public FunctionPass { + +This declares a "``Hello``" class that is a subclass of `FunctionPass +`. The different builtin pass subclasses +are described in detail :ref:`later `, but +for now, know that ``FunctionPass`` operates on a function at a time. + +.. code-block:: c++ + + static char ID; + Hello() : FunctionPass(ID) {} + +This declares pass identifier used by LLVM to identify pass. This allows LLVM +to avoid using expensive C++ runtime information. + +.. code-block:: c++ + + virtual bool runOnFunction(Function &F) { + errs() << "Hello: "; + errs().write_escaped(F.getName()) << "\n"; + return false; + } + }; // end of struct Hello + } // end of anonymous namespace + +We declare a :ref:`runOnFunction ` method, +which overrides an abstract virtual method inherited from :ref:`FunctionPass +`. This is where we are supposed to do our +thing, so we just print out our message with the name of each function. + +.. code-block:: c++ + + char Hello::ID = 0; + +We initialize pass ID here. LLVM uses ID's address to identify a pass, so +initialization value is not important. + +.. code-block:: c++ + + static RegisterPass X("hello", "Hello World Pass", + false /* Only looks at CFG */, + false /* Analysis Pass */); + +Lastly, we :ref:`register our class ` +``Hello``, giving it a command line argument "``hello``", and a name "Hello +World Pass". The last two arguments describe its behavior: if a pass walks CFG +without modifying it then the third argument is set to ``true``; if a pass is +an analysis pass, for example dominator tree pass, then ``true`` is supplied as +the fourth argument. + +As a whole, the ``.cpp`` file looks like: + +.. code-block:: c++ + + #include "llvm/Pass.h" + #include "llvm/Function.h" + #include "llvm/Support/raw_ostream.h" + + using namespace llvm; + + namespace { + struct Hello : public FunctionPass { + static char ID; + Hello() : FunctionPass(ID) {} + + virtual bool runOnFunction(Function &F) { + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; + return false; + } + }; + } + + char Hello::ID = 0; + static RegisterPass X("hello", "Hello World Pass", false, false); + +Now that it's all together, compile the file with a simple "``gmake``" command +in the local directory and you should get a new file +"``Debug+Asserts/lib/Hello.so``" under the top level directory of the LLVM +source tree (not in the local directory). Note that everything in this file is +contained in an anonymous namespace --- this reflects the fact that passes +are self contained units that do not need external interfaces (although they +can have them) to be useful. + +Running a pass with ``opt`` +--------------------------- + +Now that you have a brand new shiny shared object file, we can use the +:program:`opt` command to run an LLVM program through your pass. Because you +registered your pass with ``RegisterPass``, you will be able to use the +:program:`opt` tool to access it, once loaded. + +To test it, follow the example at the end of the :doc:`GettingStarted` to +compile "Hello World" to LLVM. We can now run the bitcode file (hello.bc) for +the program through our transformation like this (or course, any bitcode file +will work): + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null + Hello: __main + Hello: puts + Hello: main + +The :option:`-load` option specifies that :program:`opt` should load your pass +as a shared object, which makes "``-hello``" a valid command line argument +(which is one reason you need to :ref:`register your pass +`). Because the Hello pass does not modify +the program in any interesting way, we just throw away the result of +:program:`opt` (sending it to ``/dev/null``). + +To see what happened to the other string you registered, try running +:program:`opt` with the :option:`-help` option: + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -help + OVERVIEW: llvm .bc -> .bc modular optimizer + + USAGE: opt [options] + + OPTIONS: + Optimizations available: + ... + -globalopt - Global Variable Optimizer + -globalsmodref-aa - Simple mod/ref analysis for globals + -gvn - Global Value Numbering + -hello - Hello World Pass + -indvars - Induction Variable Simplification + -inline - Function Integration/Inlining + -insert-edge-profiling - Insert instrumentation for edge profiling + ... + +The pass name gets added as the information string for your pass, giving some +documentation to users of :program:`opt`. Now that you have a working pass, +you would go ahead and make it do the cool transformations you want. Once you +get it all working and tested, it may become useful to find out how fast your +pass is. The :ref:`PassManager ` provides a +nice command line option (:option:`--time-passes`) that allows you to get +information about the execution time of your pass along with the other passes +you queue up. For example: + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null + Hello: __main + Hello: puts + Hello: main + =============================================================================== + ... Pass execution timing report ... + =============================================================================== + Total Execution Time: 0.02 seconds (0.0479059 wall clock) + + ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Pass Name --- + 0.0100 (100.0%) 0.0000 ( 0.0%) 0.0100 ( 50.0%) 0.0402 ( 84.0%) Bitcode Writer + 0.0000 ( 0.0%) 0.0100 (100.0%) 0.0100 ( 50.0%) 0.0031 ( 6.4%) Dominator Set Construction + 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0013 ( 2.7%) Module Verifier + 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0033 ( 6.9%) Hello World Pass + 0.0100 (100.0%) 0.0100 (100.0%) 0.0200 (100.0%) 0.0479 (100.0%) TOTAL + +As you can see, our implementation above is pretty fast. The additional +passes listed are automatically inserted by the :program:`opt` tool to verify +that the LLVM emitted by your pass is still valid and well formed LLVM, which +hasn't been broken somehow. + +Now that you have seen the basics of the mechanics behind passes, we can talk +about some more details of how they work and how to use them. + +.. _writing-an-llvm-pass-pass-classes: + +Pass classes and requirements +============================= + +One of the first things that you should do when designing a new pass is to +decide what class you should subclass for your pass. The :ref:`Hello World +` example uses the :ref:`FunctionPass +` class for its implementation, but we did +not discuss why or when this should occur. Here we talk about the classes +available, from the most general to the most specific. + +When choosing a superclass for your ``Pass``, you should choose the **most +specific** class possible, while still being able to meet the requirements +listed. This gives the LLVM Pass Infrastructure information necessary to +optimize how passes are run, so that the resultant compiler isn't unnecessarily +slow. + +The ``ImmutablePass`` class +--------------------------- + +The most plain and boring type of pass is the "`ImmutablePass +`_" class. This pass +type is used for passes that do not have to be run, do not change state, and +never need to be updated. This is not a normal type of transformation or +analysis, but can provide information about the current compiler configuration. + +Although this pass class is very infrequently used, it is important for +providing information about the current target machine being compiled for, and +other static information that can affect the various transformations. + +``ImmutablePass``\ es never invalidate other transformations, are never +invalidated, and are never "run". + +.. _writing-an-llvm-pass-ModulePass: + +The ``ModulePass`` class +------------------------ + +The `ModulePass `_ class +is the most general of all superclasses that you can use. Deriving from +``ModulePass`` indicates that your pass uses the entire program as a unit, +referring to function bodies in no predictable order, or adding and removing +functions. Because nothing is known about the behavior of ``ModulePass`` +subclasses, no optimization can be done for their execution. + +A module pass can use function level passes (e.g. dominators) using the +``getAnalysis`` interface ``getAnalysis(llvm::Function *)`` to +provide the function to retrieve analysis result for, if the function pass does +not require any module or immutable passes. Note that this can only be done +for functions for which the analysis ran, e.g. in the case of dominators you +should only ask for the ``DominatorTree`` for function definitions, not +declarations. + +To write a correct ``ModulePass`` subclass, derive from ``ModulePass`` and +overload the ``runOnModule`` method with the following signature: + +The ``runOnModule`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnModule(Module &M) = 0; + +The ``runOnModule`` method performs the interesting work of the pass. It +should return ``true`` if the module was modified by the transformation and +``false`` otherwise. + +.. _writing-an-llvm-pass-CallGraphSCCPass: + +The ``CallGraphSCCPass`` class +------------------------------ + +The `CallGraphSCCPass +`_ is used by +passes that need to traverse the program bottom-up on the call graph (callees +before callers). Deriving from ``CallGraphSCCPass`` provides some mechanics +for building and traversing the ``CallGraph``, but also allows the system to +optimize execution of ``CallGraphSCCPass``\ es. If your pass meets the +requirements outlined below, and doesn't meet the requirements of a +:ref:`FunctionPass ` or :ref:`BasicBlockPass +`, you should derive from +``CallGraphSCCPass``. + +``TODO``: explain briefly what SCC, Tarjan's algo, and B-U mean. + +To be explicit, CallGraphSCCPass subclasses are: + +#. ... *not allowed* to inspect or modify any ``Function``\ s other than those + in the current SCC and the direct callers and direct callees of the SCC. +#. ... *required* to preserve the current ``CallGraph`` object, updating it to + reflect any changes made to the program. +#. ... *not allowed* to add or remove SCC's from the current Module, though + they may change the contents of an SCC. +#. ... *allowed* to add or remove global variables from the current Module. +#. ... *allowed* to maintain state across invocations of :ref:`runOnSCC + ` (including global data). + +Implementing a ``CallGraphSCCPass`` is slightly tricky in some cases because it +has to handle SCCs with more than one node in it. All of the virtual methods +described below should return ``true`` if they modified the program, or +``false`` if they didn't. + +The ``doInitialization(CallGraph &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doInitialization(CallGraph &CG); + +The ``doInitialization`` method is allowed to do most of the things that +``CallGraphSCCPass``\ es are not allowed to do. They can add and remove +functions, get pointers to functions, etc. The ``doInitialization`` method is +designed to do simple initialization type of stuff that does not depend on the +SCCs being processed. The ``doInitialization`` method call is not scheduled to +overlap with any other pass executions (thus it should be very fast). + +.. _writing-an-llvm-pass-runOnSCC: + +The ``runOnSCC`` method +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnSCC(CallGraphSCC &SCC) = 0; + +The ``runOnSCC`` method performs the interesting work of the pass, and should +return ``true`` if the module was modified by the transformation, ``false`` +otherwise. + +The ``doFinalization(CallGraph &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doFinalization(CallGraph &CG); + +The ``doFinalization`` method is an infrequently used method that is called +when the pass framework has finished calling :ref:`runOnFunction +` for every function in the program being +compiled. + +.. _writing-an-llvm-pass-FunctionPass: + +The ``FunctionPass`` class +-------------------------- + +In contrast to ``ModulePass`` subclasses, `FunctionPass +`_ subclasses do have a +predictable, local behavior that can be expected by the system. All +``FunctionPass`` execute on each function in the program independent of all of +the other functions in the program. ``FunctionPass``\ es do not require that +they are executed in a particular order, and ``FunctionPass``\ es do not modify +external functions. + +To be explicit, ``FunctionPass`` subclasses are not allowed to: + +#. Modify a ``Function`` other than the one currently being processed. +#. Add or remove ``Function``\ s from the current ``Module``. +#. Add or remove global variables from the current ``Module``. +#. Maintain state across invocations of:ref:`runOnFunction + ` (including global data). + +Implementing a ``FunctionPass`` is usually straightforward (See the :ref:`Hello +World ` pass for example). +``FunctionPass``\ es may overload three virtual methods to do their work. All +of these methods should return ``true`` if they modified the program, or +``false`` if they didn't. + +.. _writing-an-llvm-pass-doInitialization-mod: + +The ``doInitialization(Module &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doInitialization(Module &M); + +The ``doInitialization`` method is allowed to do most of the things that +``FunctionPass``\ es are not allowed to do. They can add and remove functions, +get pointers to functions, etc. The ``doInitialization`` method is designed to +do simple initialization type of stuff that does not depend on the functions +being processed. The ``doInitialization`` method call is not scheduled to +overlap with any other pass executions (thus it should be very fast). + +A good example of how this method should be used is the `LowerAllocations +`_ pass. This pass +converts ``malloc`` and ``free`` instructions into platform dependent +``malloc()`` and ``free()`` function calls. It uses the ``doInitialization`` +method to get a reference to the ``malloc`` and ``free`` functions that it +needs, adding prototypes to the module if necessary. + +.. _writing-an-llvm-pass-runOnFunction: + +The ``runOnFunction`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnFunction(Function &F) = 0; + +The ``runOnFunction`` method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a ``true`` value +should be returned if the function is modified. + +.. _writing-an-llvm-pass-doFinalization-mod: + +The ``doFinalization(Module &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doFinalization(Module &M); + +The ``doFinalization`` method is an infrequently used method that is called +when the pass framework has finished calling :ref:`runOnFunction +` for every function in the program being +compiled. + +.. _writing-an-llvm-pass-LoopPass: + +The ``LoopPass`` class +---------------------- + +All ``LoopPass`` execute on each loop in the function independent of all of the +other loops in the function. ``LoopPass`` processes loops in loop nest order +such that outer most loop is processed last. + +``LoopPass`` subclasses are allowed to update loop nest using ``LPPassManager`` +interface. Implementing a loop pass is usually straightforward. +``LoopPass``\ es may overload three virtual methods to do their work. All +these methods should return ``true`` if they modified the program, or ``false`` +if they didn't. + +The ``doInitialization(Loop *, LPPassManager &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doInitialization(Loop *, LPPassManager &LPM); + +The ``doInitialization`` method is designed to do simple initialization type of +stuff that does not depend on the functions being processed. The +``doInitialization`` method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). ``LPPassManager`` interface +should be used to access ``Function`` or ``Module`` level analysis information. + +.. _writing-an-llvm-pass-runOnLoop: + +The ``runOnLoop`` method +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0; + +The ``runOnLoop`` method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a ``true`` value +should be returned if the function is modified. ``LPPassManager`` interface +should be used to update loop nest. + +The ``doFinalization()`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doFinalization(); + +The ``doFinalization`` method is an infrequently used method that is called +when the pass framework has finished calling :ref:`runOnLoop +` for every loop in the program being compiled. + +.. _writing-an-llvm-pass-RegionPass: + +The ``RegionPass`` class +------------------------ + +``RegionPass`` is similar to :ref:`LoopPass `, +but executes on each single entry single exit region in the function. +``RegionPass`` processes regions in nested order such that the outer most +region is processed last. + +``RegionPass`` subclasses are allowed to update the region tree by using the +``RGPassManager`` interface. You may overload three virtual methods of +``RegionPass`` to implement your own region pass. All these methods should +return ``true`` if they modified the program, or ``false`` if they did not. + +The ``doInitialization(Region *, RGPassManager &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doInitialization(Region *, RGPassManager &RGM); + +The ``doInitialization`` method is designed to do simple initialization type of +stuff that does not depend on the functions being processed. The +``doInitialization`` method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). ``RPPassManager`` interface +should be used to access ``Function`` or ``Module`` level analysis information. + +.. _writing-an-llvm-pass-runOnRegion: + +The ``runOnRegion`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0; + +The ``runOnRegion`` method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a true value should be +returned if the region is modified. ``RGPassManager`` interface should be used to +update region tree. + +The ``doFinalization()`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doFinalization(); + +The ``doFinalization`` method is an infrequently used method that is called +when the pass framework has finished calling :ref:`runOnRegion +` for every region in the program being +compiled. + +.. _writing-an-llvm-pass-BasicBlockPass: + +The ``BasicBlockPass`` class +---------------------------- + +``BasicBlockPass``\ es are just like :ref:`FunctionPass's +` , except that they must limit their scope +of inspection and modification to a single basic block at a time. As such, +they are **not** allowed to do any of the following: + +#. Modify or inspect any basic blocks outside of the current one. +#. Maintain state across invocations of :ref:`runOnBasicBlock + `. +#. Modify the control flow graph (by altering terminator instructions) +#. Any of the things forbidden for :ref:`FunctionPasses + `. + +``BasicBlockPass``\ es are useful for traditional local and "peephole" +optimizations. They may override the same :ref:`doInitialization(Module &) +` and :ref:`doFinalization(Module &) +` methods that :ref:`FunctionPass's +` have, but also have the following virtual +methods that may also be implemented: + +The ``doInitialization(Function &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doInitialization(Function &F); + +The ``doInitialization`` method is allowed to do most of the things that +``BasicBlockPass``\ es are not allowed to do, but that ``FunctionPass``\ es +can. The ``doInitialization`` method is designed to do simple initialization +that does not depend on the ``BasicBlock``\ s being processed. The +``doInitialization`` method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). + +.. _writing-an-llvm-pass-runOnBasicBlock: + +The ``runOnBasicBlock`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnBasicBlock(BasicBlock &BB) = 0; + +Override this function to do the work of the ``BasicBlockPass``. This function +is not allowed to inspect or modify basic blocks other than the parameter, and +are not allowed to modify the CFG. A ``true`` value must be returned if the +basic block is modified. + +The ``doFinalization(Function &)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool doFinalization(Function &F); + +The ``doFinalization`` method is an infrequently used method that is called +when the pass framework has finished calling :ref:`runOnBasicBlock +` for every ``BasicBlock`` in the program +being compiled. This can be used to perform per-function finalization. + +The ``MachineFunctionPass`` class +--------------------------------- + +A ``MachineFunctionPass`` is a part of the LLVM code generator that executes on +the machine-dependent representation of each LLVM function in the program. + +Code generator passes are registered and initialized specially by +``TargetMachine::addPassesToEmitFile`` and similar routines, so they cannot +generally be run from the :program:`opt` or :program:`bugpoint` commands. + +A ``MachineFunctionPass`` is also a ``FunctionPass``, so all the restrictions +that apply to a ``FunctionPass`` also apply to it. ``MachineFunctionPass``\ es +also have additional restrictions. In particular, ``MachineFunctionPass``\ es +are not allowed to do any of the following: + +#. Modify or create any LLVM IR ``Instruction``\ s, ``BasicBlock``\ s, + ``Argument``\ s, ``Function``\ s, ``GlobalVariable``\ s, + ``GlobalAlias``\ es, or ``Module``\ s. +#. Modify a ``MachineFunction`` other than the one currently being processed. +#. Maintain state across invocations of :ref:`runOnMachineFunction + ` (including global data). + +.. _writing-an-llvm-pass-runOnMachineFunction: + +The ``runOnMachineFunction(MachineFunction &MF)`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual bool runOnMachineFunction(MachineFunction &MF) = 0; + +``runOnMachineFunction`` can be considered the main entry point of a +``MachineFunctionPass``; that is, you should override this method to do the +work of your ``MachineFunctionPass``. + +The ``runOnMachineFunction`` method is called on every ``MachineFunction`` in a +``Module``, so that the ``MachineFunctionPass`` may perform optimizations on +the machine-dependent representation of the function. If you want to get at +the LLVM ``Function`` for the ``MachineFunction`` you're working on, use +``MachineFunction``'s ``getFunction()`` accessor method --- but remember, you +may not modify the LLVM ``Function`` or its contents from a +``MachineFunctionPass``. + +.. _writing-an-llvm-pass-registration: + +Pass registration +----------------- + +In the :ref:`Hello World ` example pass we +illustrated how pass registration works, and discussed some of the reasons that +it is used and what it does. Here we discuss how and why passes are +registered. + +As we saw above, passes are registered with the ``RegisterPass`` template. The +template parameter is the name of the pass that is to be used on the command +line to specify that the pass should be added to a program (for example, with +:program:`opt` or :program:`bugpoint`). The first argument is the name of the +pass, which is to be used for the :option:`-help` output of programs, as well +as for debug output generated by the :option:`--debug-pass` option. + +If you want your pass to be easily dumpable, you should implement the virtual +print method: + +The ``print`` method +^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual void print(llvm::raw_ostream &O, const Module *M) const; + +The ``print`` method must be implemented by "analyses" in order to print a +human readable version of the analysis results. This is useful for debugging +an analysis itself, as well as for other people to figure out how an analysis +works. Use the opt ``-analyze`` argument to invoke this method. + +The ``llvm::raw_ostream`` parameter specifies the stream to write the results +on, and the ``Module`` parameter gives a pointer to the top level module of the +program that has been analyzed. Note however that this pointer may be ``NULL`` +in certain circumstances (such as calling the ``Pass::dump()`` from a +debugger), so it should only be used to enhance debug output, it should not be +depended on. + +.. _writing-an-llvm-pass-interaction: + +Specifying interactions between passes +-------------------------------------- + +One of the main responsibilities of the ``PassManager`` is to make sure that +passes interact with each other correctly. Because ``PassManager`` tries to +:ref:`optimize the execution of passes ` it +must know how the passes interact with each other and what dependencies exist +between the various passes. To track this, each pass can declare the set of +passes that are required to be executed before the current pass, and the passes +which are invalidated by the current pass. + +Typically this functionality is used to require that analysis results are +computed before your pass is run. Running arbitrary transformation passes can +invalidate the computed analysis results, which is what the invalidation set +specifies. If a pass does not implement the :ref:`getAnalysisUsage +` method, it defaults to not having any +prerequisite passes, and invalidating **all** other passes. + +.. _writing-an-llvm-pass-getAnalysisUsage: + +The ``getAnalysisUsage`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual void getAnalysisUsage(AnalysisUsage &Info) const; + +By implementing the ``getAnalysisUsage`` method, the required and invalidated +sets may be specified for your transformation. The implementation should fill +in the `AnalysisUsage +`_ object with +information about which passes are required and not invalidated. To do this, a +pass may call any of the following methods on the ``AnalysisUsage`` object: + +The ``AnalysisUsage::addRequired<>`` and ``AnalysisUsage::addRequiredTransitive<>`` methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your pass requires a previous pass to be executed (an analysis for example), +it can use one of these methods to arrange for it to be run before your pass. +LLVM has many different types of analyses and passes that can be required, +spanning the range from ``DominatorSet`` to ``BreakCriticalEdges``. Requiring +``BreakCriticalEdges``, for example, guarantees that there will be no critical +edges in the CFG when your pass has been run. + +Some analyses chain to other analyses to do their job. For example, an +`AliasAnalysis ` implementation is required to :ref:`chain +` to other alias analysis passes. In cases where +analyses chain, the ``addRequiredTransitive`` method should be used instead of +the ``addRequired`` method. This informs the ``PassManager`` that the +transitively required pass should be alive as long as the requiring pass is. + +The ``AnalysisUsage::addPreserved<>`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One of the jobs of the ``PassManager`` is to optimize how and when analyses are +run. In particular, it attempts to avoid recomputing data unless it needs to. +For this reason, passes are allowed to declare that they preserve (i.e., they +don't invalidate) an existing analysis if it's available. For example, a +simple constant folding pass would not modify the CFG, so it can't possibly +affect the results of dominator analysis. By default, all passes are assumed +to invalidate all others. + +The ``AnalysisUsage`` class provides several methods which are useful in +certain circumstances that are related to ``addPreserved``. In particular, the +``setPreservesAll`` method can be called to indicate that the pass does not +modify the LLVM program at all (which is true for analyses), and the +``setPreservesCFG`` method can be used by transformations that change +instructions in the program but do not modify the CFG or terminator +instructions (note that this property is implicitly set for +:ref:`BasicBlockPass `\ es). + +``addPreserved`` is particularly useful for transformations like +``BreakCriticalEdges``. This pass knows how to update a small set of loop and +dominator related analyses if they exist, so it can preserve them, despite the +fact that it hacks on the CFG. + +Example implementations of ``getAnalysisUsage`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + // This example modifies the program, but does not modify the CFG + void LICM::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + } + +.. _writing-an-llvm-pass-getAnalysis: + +The ``getAnalysis<>`` and ``getAnalysisIfAvailable<>`` methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``Pass::getAnalysis<>`` method is automatically inherited by your class, +providing you with access to the passes that you declared that you required +with the :ref:`getAnalysisUsage ` +method. It takes a single template argument that specifies which pass class +you want, and returns a reference to that pass. For example: + +.. code-block:: c++ + + bool LICM::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis(); + //... + } + +This method call returns a reference to the pass desired. You may get a +runtime assertion failure if you attempt to get an analysis that you did not +declare as required in your :ref:`getAnalysisUsage +` implementation. This method can be +called by your ``run*`` method implementation, or by any other local method +invoked by your ``run*`` method. + +A module level pass can use function level analysis info using this interface. +For example: + +.. code-block:: c++ + + bool ModuleLevelPass::runOnModule(Module &M) { + //... + DominatorTree &DT = getAnalysis(Func); + //... + } + +In above example, ``runOnFunction`` for ``DominatorTree`` is called by pass +manager before returning a reference to the desired pass. + +If your pass is capable of updating analyses if they exist (e.g., +``BreakCriticalEdges``, as described above), you can use the +``getAnalysisIfAvailable`` method, which returns a pointer to the analysis if +it is active. For example: + +.. code-block:: c++ + + if (DominatorSet *DS = getAnalysisIfAvailable()) { + // A DominatorSet is active. This code will update it. + } + +Implementing Analysis Groups +---------------------------- + +Now that we understand the basics of how passes are defined, how they are used, +and how they are required from other passes, it's time to get a little bit +fancier. All of the pass relationships that we have seen so far are very +simple: one pass depends on one other specific pass to be run before it can +run. For many applications, this is great, for others, more flexibility is +required. + +In particular, some analyses are defined such that there is a single simple +interface to the analysis results, but multiple ways of calculating them. +Consider alias analysis for example. The most trivial alias analysis returns +"may alias" for any alias query. The most sophisticated analysis a +flow-sensitive, context-sensitive interprocedural analysis that can take a +significant amount of time to execute (and obviously, there is a lot of room +between these two extremes for other implementations). To cleanly support +situations like this, the LLVM Pass Infrastructure supports the notion of +Analysis Groups. + +Analysis Group Concepts +^^^^^^^^^^^^^^^^^^^^^^^ + +An Analysis Group is a single simple interface that may be implemented by +multiple different passes. Analysis Groups can be given human readable names +just like passes, but unlike passes, they need not derive from the ``Pass`` +class. An analysis group may have one or more implementations, one of which is +the "default" implementation. + +Analysis groups are used by client passes just like other passes are: the +``AnalysisUsage::addRequired()`` and ``Pass::getAnalysis()`` methods. In order +to resolve this requirement, the :ref:`PassManager +` scans the available passes to see if any +implementations of the analysis group are available. If none is available, the +default implementation is created for the pass to use. All standard rules for +:ref:`interaction between passes ` still +apply. + +Although :ref:`Pass Registration ` is +optional for normal passes, all analysis group implementations must be +registered, and must use the :ref:`INITIALIZE_AG_PASS +` template to join the +implementation pool. Also, a default implementation of the interface **must** +be registered with :ref:`RegisterAnalysisGroup +`. + +As a concrete example of an Analysis Group in action, consider the +`AliasAnalysis `_ +analysis group. The default implementation of the alias analysis interface +(the `basicaa `_ pass) +just does a few simple checks that don't require significant analysis to +compute (such as: two different globals can never alias each other, etc). +Passes that use the `AliasAnalysis +`_ interface (for +example the `gcse `_ pass), do not +care which implementation of alias analysis is actually provided, they just use +the designated interface. + +From the user's perspective, commands work just like normal. Issuing the +command ``opt -gcse ...`` will cause the ``basicaa`` class to be instantiated +and added to the pass sequence. Issuing the command ``opt -somefancyaa -gcse +...`` will cause the ``gcse`` pass to use the ``somefancyaa`` alias analysis +(which doesn't actually exist, it's just a hypothetical example) instead. + +.. _writing-an-llvm-pass-RegisterAnalysisGroup: + +Using ``RegisterAnalysisGroup`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``RegisterAnalysisGroup`` template is used to register the analysis group +itself, while the ``INITIALIZE_AG_PASS`` is used to add pass implementations to +the analysis group. First, an analysis group should be registered, with a +human readable name provided for it. Unlike registration of passes, there is +no command line argument to be specified for the Analysis Group Interface +itself, because it is "abstract": + +.. code-block:: c++ + + static RegisterAnalysisGroup A("Alias Analysis"); + +Once the analysis is registered, passes can declare that they are valid +implementations of the interface by using the following code: + +.. code-block:: c++ + + namespace { + // Declare that we implement the AliasAnalysis interface + INITIALIZE_AG_PASS(FancyAA, AliasAnalysis , "somefancyaa", + "A more complex alias analysis implementation", + false, // Is CFG Only? + true, // Is Analysis? + false); // Is default Analysis Group implementation? + } + +This just shows a class ``FancyAA`` that uses the ``INITIALIZE_AG_PASS`` macro +both to register and to "join" the `AliasAnalysis +`_ analysis group. +Every implementation of an analysis group should join using this macro. + +.. code-block:: c++ + + namespace { + // Declare that we implement the AliasAnalysis interface + INITIALIZE_AG_PASS(BasicAA, AliasAnalysis, "basicaa", + "Basic Alias Analysis (default AA impl)", + false, // Is CFG Only? + true, // Is Analysis? + true); // Is default Analysis Group implementation? + } + +Here we show how the default implementation is specified (using the final +argument to the ``INITIALIZE_AG_PASS`` template). There must be exactly one +default implementation available at all times for an Analysis Group to be used. +Only default implementation can derive from ``ImmutablePass``. Here we declare +that the `BasicAliasAnalysis +`_ pass is the default +implementation for the interface. + +Pass Statistics +=============== + +The `Statistic `_ class is +designed to be an easy way to expose various success metrics from passes. +These statistics are printed at the end of a run, when the :option:`-stats` +command line option is enabled on the command line. See the :ref:`Statistics +section ` in the Programmer's Manual for details. + +.. _writing-an-llvm-pass-passmanager: + +What PassManager does +--------------------- + +The `PassManager `_ `class +`_ takes a list of +passes, ensures their :ref:`prerequisites ` +are set up correctly, and then schedules passes to run efficiently. All of the +LLVM tools that run passes use the PassManager for execution of these passes. + +The PassManager does two main things to try to reduce the execution time of a +series of passes: + +#. **Share analysis results.** The ``PassManager`` attempts to avoid + recomputing analysis results as much as possible. This means keeping track + of which analyses are available already, which analyses get invalidated, and + which analyses are needed to be run for a pass. An important part of work + is that the ``PassManager`` tracks the exact lifetime of all analysis + results, allowing it to :ref:`free memory + ` allocated to holding analysis results + as soon as they are no longer needed. + +#. **Pipeline the execution of passes on the program.** The ``PassManager`` + attempts to get better cache and memory usage behavior out of a series of + passes by pipelining the passes together. This means that, given a series + of consecutive :ref:`FunctionPass `, it + will execute all of the :ref:`FunctionPass + ` on the first function, then all of the + :ref:`FunctionPasses ` on the second + function, etc... until the entire program has been run through the passes. + + This improves the cache behavior of the compiler, because it is only + touching the LLVM program representation for a single function at a time, + instead of traversing the entire program. It reduces the memory consumption + of compiler, because, for example, only one `DominatorSet + `_ needs to be + calculated at a time. This also makes it possible to implement some + :ref:`interesting enhancements ` in the future. + +The effectiveness of the ``PassManager`` is influenced directly by how much +information it has about the behaviors of the passes it is scheduling. For +example, the "preserved" set is intentionally conservative in the face of an +unimplemented :ref:`getAnalysisUsage ` +method. Not implementing when it should be implemented will have the effect of +not allowing any analysis results to live across the execution of your pass. + +The ``PassManager`` class exposes a ``--debug-pass`` command line options that +is useful for debugging pass execution, seeing how things work, and diagnosing +when you should be preserving more analyses than you currently are. (To get +information about all of the variants of the ``--debug-pass`` option, just type +"``opt -help-hidden``"). + +By using the --debug-pass=Structure option, for example, we can see how our +:ref:`Hello World ` pass interacts with other +passes. Lets try it out with the gcse and licm passes: + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null + Module Pass Manager + Function Pass Manager + Dominator Set Construction + Immediate Dominators Construction + Global Common Subexpression Elimination + -- Immediate Dominators Construction + -- Global Common Subexpression Elimination + Natural Loop Construction + Loop Invariant Code Motion + -- Natural Loop Construction + -- Loop Invariant Code Motion + Module Verifier + -- Dominator Set Construction + -- Module Verifier + Bitcode Writer + --Bitcode Writer + +This output shows us when passes are constructed and when the analysis results +are known to be dead (prefixed with "``--``"). Here we see that GCSE uses +dominator and immediate dominator information to do its job. The LICM pass +uses natural loop information, which uses dominator sets, but not immediate +dominators. Because immediate dominators are no longer useful after the GCSE +pass, it is immediately destroyed. The dominator sets are then reused to +compute natural loop information, which is then used by the LICM pass. + +After the LICM pass, the module verifier runs (which is automatically added by +the :program:`opt` tool), which uses the dominator set to check that the +resultant LLVM code is well formed. After it finishes, the dominator set +information is destroyed, after being computed once, and shared by three +passes. + +Lets see how this changes when we run the :ref:`Hello World +` pass in between the two passes: + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null + Module Pass Manager + Function Pass Manager + Dominator Set Construction + Immediate Dominators Construction + Global Common Subexpression Elimination + -- Dominator Set Construction + -- Immediate Dominators Construction + -- Global Common Subexpression Elimination + Hello World Pass + -- Hello World Pass + Dominator Set Construction + Natural Loop Construction + Loop Invariant Code Motion + -- Natural Loop Construction + -- Loop Invariant Code Motion + Module Verifier + -- Dominator Set Construction + -- Module Verifier + Bitcode Writer + --Bitcode Writer + Hello: __main + Hello: puts + Hello: main + +Here we see that the :ref:`Hello World ` pass +has killed the Dominator Set pass, even though it doesn't modify the code at +all! To fix this, we need to add the following :ref:`getAnalysisUsage +` method to our pass: + +.. code-block:: c++ + + // We don't modify the program, so we preserve all analyses + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + +Now when we run our pass, we get this output: + +.. code-block:: console + + $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null + Pass Arguments: -gcse -hello -licm + Module Pass Manager + Function Pass Manager + Dominator Set Construction + Immediate Dominators Construction + Global Common Subexpression Elimination + -- Immediate Dominators Construction + -- Global Common Subexpression Elimination + Hello World Pass + -- Hello World Pass + Natural Loop Construction + Loop Invariant Code Motion + -- Loop Invariant Code Motion + -- Natural Loop Construction + Module Verifier + -- Dominator Set Construction + -- Module Verifier + Bitcode Writer + --Bitcode Writer + Hello: __main + Hello: puts + Hello: main + +Which shows that we don't accidentally invalidate dominator information +anymore, and therefore do not have to compute it twice. + +.. _writing-an-llvm-pass-releaseMemory: + +The ``releaseMemory`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c++ + + virtual void releaseMemory(); + +The ``PassManager`` automatically determines when to compute analysis results, +and how long to keep them around for. Because the lifetime of the pass object +itself is effectively the entire duration of the compilation process, we need +some way to free analysis results when they are no longer useful. The +``releaseMemory`` virtual method is the way to do this. + +If you are writing an analysis or any other pass that retains a significant +amount of state (for use by another pass which "requires" your pass and uses +the :ref:`getAnalysis ` method) you should +implement ``releaseMemory`` to, well, release the memory allocated to maintain +this internal state. This method is called after the ``run*`` method for the +class, before the next call of ``run*`` in your pass. + +Registering dynamically loaded passes +===================================== + +*Size matters* when constructing production quality tools using LLVM, both for +the purposes of distribution, and for regulating the resident code size when +running on the target system. Therefore, it becomes desirable to selectively +use some passes, while omitting others and maintain the flexibility to change +configurations later on. You want to be able to do all this, and, provide +feedback to the user. This is where pass registration comes into play. + +The fundamental mechanisms for pass registration are the +``MachinePassRegistry`` class and subclasses of ``MachinePassRegistryNode``. + +An instance of ``MachinePassRegistry`` is used to maintain a list of +``MachinePassRegistryNode`` objects. This instance maintains the list and +communicates additions and deletions to the command line interface. + +An instance of ``MachinePassRegistryNode`` subclass is used to maintain +information provided about a particular pass. This information includes the +command line name, the command help string and the address of the function used +to create an instance of the pass. A global static constructor of one of these +instances *registers* with a corresponding ``MachinePassRegistry``, the static +destructor *unregisters*. Thus a pass that is statically linked in the tool +will be registered at start up. A dynamically loaded pass will register on +load and unregister at unload. + +Using existing registries +------------------------- + +There are predefined registries to track instruction scheduling +(``RegisterScheduler``) and register allocation (``RegisterRegAlloc``) machine +passes. Here we will describe how to *register* a register allocator machine +pass. + +Implement your register allocator machine pass. In your register allocator +``.cpp`` file add the following include: + +.. code-block:: c++ + + #include "llvm/CodeGen/RegAllocRegistry.h" + +Also in your register allocator ``.cpp`` file, define a creator function in the +form: + +.. code-block:: c++ + + FunctionPass *createMyRegisterAllocator() { + return new MyRegisterAllocator(); + } + +Note that the signature of this function should match the type of +``RegisterRegAlloc::FunctionPassCtor``. In the same file add the "installing" +declaration, in the form: + +.. code-block:: c++ + + static RegisterRegAlloc myRegAlloc("myregalloc", + "my register allocator help string", + createMyRegisterAllocator); + +Note the two spaces prior to the help string produces a tidy result on the +:option:`-help` query. + +.. code-block:: console + + $ llc -help + ... + -regalloc - Register allocator to use (default=linearscan) + =linearscan - linear scan register allocator + =local - local register allocator + =simple - simple register allocator + =myregalloc - my register allocator help string + ... + +And that's it. The user is now free to use ``-regalloc=myregalloc`` as an +option. Registering instruction schedulers is similar except use the +``RegisterScheduler`` class. Note that the +``RegisterScheduler::FunctionPassCtor`` is significantly different from +``RegisterRegAlloc::FunctionPassCtor``. + +To force the load/linking of your register allocator into the +:program:`llc`/:program:`lli` tools, add your creator function's global +declaration to ``Passes.h`` and add a "pseudo" call line to +``llvm/Codegen/LinkAllCodegenComponents.h``. + +Creating new registries +----------------------- + +The easiest way to get started is to clone one of the existing registries; we +recommend ``llvm/CodeGen/RegAllocRegistry.h``. The key things to modify are +the class name and the ``FunctionPassCtor`` type. + +Then you need to declare the registry. Example: if your pass registry is +``RegisterMyPasses`` then define: + +.. code-block:: c++ + + MachinePassRegistry RegisterMyPasses::Registry; + +And finally, declare the command line option for your passes. Example: + +.. code-block:: c++ + + cl::opt > + MyPassOpt("mypass", + cl::init(&createDefaultMyPass), + cl::desc("my pass option help")); + +Here the command option is "``mypass``", with ``createDefaultMyPass`` as the +default creator. + +Using GDB with dynamically loaded passes +---------------------------------------- + +Unfortunately, using GDB with dynamically loaded passes is not as easy as it +should be. First of all, you can't set a breakpoint in a shared object that +has not been loaded yet, and second of all there are problems with inlined +functions in shared objects. Here are some suggestions to debugging your pass +with GDB. + +For sake of discussion, I'm going to assume that you are debugging a +transformation invoked by :program:`opt`, although nothing described here +depends on that. + +Setting a breakpoint in your pass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First thing you do is start gdb on the opt process: + +.. code-block:: console + + $ gdb opt + GNU gdb 5.0 + Copyright 2000 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "sparc-sun-solaris2.6"... + (gdb) + +Note that :program:`opt` has a lot of debugging information in it, so it takes +time to load. Be patient. Since we cannot set a breakpoint in our pass yet +(the shared object isn't loaded until runtime), we must execute the process, +and have it stop before it invokes our pass, but after it has loaded the shared +object. The most foolproof way of doing this is to set a breakpoint in +``PassManager::run`` and then run the process with the arguments you want: + +.. code-block:: console + + $ (gdb) break llvm::PassManager::run + Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70. + (gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] + Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] + Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70 + 70 bool PassManager::run(Module &M) { return PM->run(M); } + (gdb) + +Once the :program:`opt` stops in the ``PassManager::run`` method you are now +free to set breakpoints in your pass so that you can trace through execution or +do other standard debugging stuff. + +Miscellaneous Problems +^^^^^^^^^^^^^^^^^^^^^^ + +Once you have the basics down, there are a couple of problems that GDB has, +some with solutions, some without. + +* Inline functions have bogus stack information. In general, GDB does a pretty + good job getting stack traces and stepping through inline functions. When a + pass is dynamically loaded however, it somehow completely loses this + capability. The only solution I know of is to de-inline a function (move it + from the body of a class to a ``.cpp`` file). + +* Restarting the program breaks breakpoints. After following the information + above, you have succeeded in getting some breakpoints planted in your pass. + Nex thing you know, you restart the program (i.e., you type "``run``" again), + and you start getting errors about breakpoints being unsettable. The only + way I have found to "fix" this problem is to delete the breakpoints that are + already set in your pass, run the program, and re-set the breakpoints once + execution stops in ``PassManager::run``. + +Hopefully these tips will help with common case debugging situations. If you'd +like to contribute some tips of your own, just contact `Chris +`_. + +Future extensions planned +------------------------- + +Although the LLVM Pass Infrastructure is very capable as it stands, and does +some nifty stuff, there are things we'd like to add in the future. Here is +where we are going: + +.. _writing-an-llvm-pass-SMP: + +Multithreaded LLVM +^^^^^^^^^^^^^^^^^^ + +Multiple CPU machines are becoming more common and compilation can never be +fast enough: obviously we should allow for a multithreaded compiler. Because +of the semantics defined for passes above (specifically they cannot maintain +state across invocations of their ``run*`` methods), a nice clean way to +implement a multithreaded compiler would be for the ``PassManager`` class to +create multiple instances of each pass object, and allow the separate instances +to be hacking on different parts of the program at the same time. + +This implementation would prevent each of the passes from having to implement +multithreaded constructs, requiring only the LLVM core to have locking in a few +places (for global resources). Although this is a simple extension, we simply +haven't had time (or multiprocessor machines, thus a reason) to implement this. +Despite that, we have kept the LLVM passes SMP ready, and you should too. + diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst new file mode 100644 index 000000000000..ac50292f4a81 --- /dev/null +++ b/docs/YamlIO.rst @@ -0,0 +1,860 @@ +===================== +YAML I/O +===================== + +.. contents:: + :local: + +Introduction to YAML +==================== + +YAML is a human readable data serialization language. The full YAML language +spec can be read at `yaml.org +`_. The simplest form of +yaml is just "scalars", "mappings", and "sequences". A scalar is any number +or string. The pound/hash symbol (#) begins a comment line. A mapping is +a set of key-value pairs where the key ends with a colon. For example: + +.. code-block:: yaml + + # a mapping + name: Tom + hat-size: 7 + +A sequence is a list of items where each item starts with a leading dash ('-'). +For example: + +.. code-block:: yaml + + # a sequence + - x86 + - x86_64 + - PowerPC + +You can combine mappings and sequences by indenting. For example a sequence +of mappings in which one of the mapping values is itself a sequence: + +.. code-block:: yaml + + # a sequence of mappings with one key's value being a sequence + - name: Tom + cpus: + - x86 + - x86_64 + - name: Bob + cpus: + - x86 + - name: Dan + cpus: + - PowerPC + - x86 + +Sometime sequences are known to be short and the one entry per line is too +verbose, so YAML offers an alternate syntax for sequences called a "Flow +Sequence" in which you put comma separated sequence elements into square +brackets. The above example could then be simplified to : + + +.. code-block:: yaml + + # a sequence of mappings with one key's value being a flow sequence + - name: Tom + cpus: [ x86, x86_64 ] + - name: Bob + cpus: [ x86 ] + - name: Dan + cpus: [ PowerPC, x86 ] + + +Introduction to YAML I/O +======================== + +The use of indenting makes the YAML easy for a human to read and understand, +but having a program read and write YAML involves a lot of tedious details. +The YAML I/O library structures and simplifies reading and writing YAML +documents. + +YAML I/O assumes you have some "native" data structures which you want to be +able to dump as YAML and recreate from YAML. The first step is to try +writing example YAML for your data structures. You may find after looking at +possible YAML representations that a direct mapping of your data structures +to YAML is not very readable. Often the fields are not in the order that +a human would find readable. Or the same information is replicated in multiple +locations, making it hard for a human to write such YAML correctly. + +In relational database theory there is a design step called normalization in +which you reorganize fields and tables. The same considerations need to +go into the design of your YAML encoding. But, you may not want to change +your existing native data structures. Therefore, when writing out YAML +there may be a normalization step, and when reading YAML there would be a +corresponding denormalization step. + +YAML I/O uses a non-invasive, traits based design. YAML I/O defines some +abstract base templates. You specialize those templates on your data types. +For instance, if you have an enumerated type FooBar you could specialize +ScalarEnumerationTraits on that type and define the enumeration() method: + +.. code-block:: c++ + + using llvm::yaml::ScalarEnumerationTraits; + using llvm::yaml::IO; + + template <> + struct ScalarEnumerationTraits { + static void enumeration(IO &io, FooBar &value) { + ... + } + }; + + +As with all YAML I/O template specializations, the ScalarEnumerationTraits is used for +both reading and writing YAML. That is, the mapping between in-memory enum +values and the YAML string representation is only in place. +This assures that the code for writing and parsing of YAML stays in sync. + +To specify a YAML mappings, you define a specialization on +llvm::yaml::MappingTraits. +If your native data structure happens to be a struct that is already normalized, +then the specialization is simple. For example: + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + template <> + struct MappingTraits { + static void mapping(IO &io, Person &info) { + io.mapRequired("name", info.name); + io.mapOptional("hat-size", info.hatSize); + } + }; + + +A YAML sequence is automatically inferred if you data type has begin()/end() +iterators and a push_back() method. Therefore any of the STL containers +(such as std::vector<>) will automatically translate to YAML sequences. + +Once you have defined specializations for your data types, you can +programmatically use YAML I/O to write a YAML document: + +.. code-block:: c++ + + using llvm::yaml::Output; + + Person tom; + tom.name = "Tom"; + tom.hatSize = 8; + Person dan; + dan.name = "Dan"; + dan.hatSize = 7; + std::vector persons; + persons.push_back(tom); + persons.push_back(dan); + + Output yout(llvm::outs()); + yout << persons; + +This would write the following: + +.. code-block:: yaml + + - name: Tom + hat-size: 8 + - name: Dan + hat-size: 7 + +And you can also read such YAML documents with the following code: + +.. code-block:: c++ + + using llvm::yaml::Input; + + typedef std::vector PersonList; + std::vector docs; + + Input yin(document.getBuffer()); + yin >> docs; + + if ( yin.error() ) + return; + + // Process read document + for ( PersonList &pl : docs ) { + for ( Person &person : pl ) { + cout << "name=" << person.name; + } + } + +One other feature of YAML is the ability to define multiple documents in a +single file. That is why reading YAML produces a vector of your document type. + + + +Error Handling +============== + +When parsing a YAML document, if the input does not match your schema (as +expressed in your XxxTraits<> specializations). YAML I/O +will print out an error message and your Input object's error() method will +return true. For instance the following document: + +.. code-block:: yaml + + - name: Tom + shoe-size: 12 + - name: Dan + hat-size: 7 + +Has a key (shoe-size) that is not defined in the schema. YAML I/O will +automatically generate this error: + +.. code-block:: yaml + + YAML:2:2: error: unknown key 'shoe-size' + shoe-size: 12 + ^~~~~~~~~ + +Similar errors are produced for other input not conforming to the schema. + + +Scalars +======= + +YAML scalars are just strings (i.e. not a sequence or mapping). The YAML I/O +library provides support for translating between YAML scalars and specific +C++ types. + + +Built-in types +-------------- +The following types have built-in support in YAML I/O: + +* bool +* float +* double +* StringRef +* int64_t +* int32_t +* int16_t +* int8_t +* uint64_t +* uint32_t +* uint16_t +* uint8_t + +That is, you can use those types in fields of MappingTraits or as element type +in sequence. When reading, YAML I/O will validate that the string found +is convertible to that type and error out if not. + + +Unique types +------------ +Given that YAML I/O is trait based, the selection of how to convert your data +to YAML is based on the type of your data. But in C++ type matching, typedefs +do not generate unique type names. That means if you have two typedefs of +unsigned int, to YAML I/O both types look exactly like unsigned int. To +facilitate make unique type names, YAML I/O provides a macro which is used +like a typedef on built-in types, but expands to create a class with conversion +operators to and from the base type. For example: + +.. code-block:: c++ + + LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyFooFlags) + LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyBarFlags) + +This generates two classes MyFooFlags and MyBarFlags which you can use in your +native data structures instead of uint32_t. They are implicitly +converted to and from uint32_t. The point of creating these unique types +is that you can now specify traits on them to get different YAML conversions. + +Hex types +--------- +An example use of a unique type is that YAML I/O provides fixed sized unsigned +integers that are written with YAML I/O as hexadecimal instead of the decimal +format used by the built-in integer types: + +* Hex64 +* Hex32 +* Hex16 +* Hex8 + +You can use llvm::yaml::Hex32 instead of uint32_t and the only different will +be that when YAML I/O writes out that type it will be formatted in hexadecimal. + + +ScalarEnumerationTraits +----------------------- +YAML I/O supports translating between in-memory enumerations and a set of string +values in YAML documents. This is done by specializing ScalarEnumerationTraits<> +on your enumeration type and define a enumeration() method. +For instance, suppose you had an enumeration of CPUs and a struct with it as +a field: + +.. code-block:: c++ + + enum CPUs { + cpu_x86_64 = 5, + cpu_x86 = 7, + cpu_PowerPC = 8 + }; + + struct Info { + CPUs cpu; + uint32_t flags; + }; + +To support reading and writing of this enumeration, you can define a +ScalarEnumerationTraits specialization on CPUs, which can then be used +as a field type: + +.. code-block:: c++ + + using llvm::yaml::ScalarEnumerationTraits; + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + template <> + struct ScalarEnumerationTraits { + static void enumeration(IO &io, CPUs &value) { + io.enumCase(value, "x86_64", cpu_x86_64); + io.enumCase(value, "x86", cpu_x86); + io.enumCase(value, "PowerPC", cpu_PowerPC); + } + }; + + template <> + struct MappingTraits { + static void mapping(IO &io, Info &info) { + io.mapRequired("cpu", info.cpu); + io.mapOptional("flags", info.flags, 0); + } + }; + +When reading YAML, if the string found does not match any of the the strings +specified by enumCase() methods, an error is automatically generated. +When writing YAML, if the value being written does not match any of the values +specified by the enumCase() methods, a runtime assertion is triggered. + + +BitValue +-------- +Another common data structure in C++ is a field where each bit has a unique +meaning. This is often used in a "flags" field. YAML I/O has support for +converting such fields to a flow sequence. For instance suppose you +had the following bit flags defined: + +.. code-block:: c++ + + enum { + flagsPointy = 1 + flagsHollow = 2 + flagsFlat = 4 + flagsRound = 8 + }; + + LLVM_YAML_UNIQUE_TYPE(MyFlags, uint32_t) + +To support reading and writing of MyFlags, you specialize ScalarBitSetTraits<> +on MyFlags and provide the bit values and their names. + +.. code-block:: c++ + + using llvm::yaml::ScalarBitSetTraits; + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + template <> + struct ScalarBitSetTraits { + static void bitset(IO &io, MyFlags &value) { + io.bitSetCase(value, "hollow", flagHollow); + io.bitSetCase(value, "flat", flagFlat); + io.bitSetCase(value, "round", flagRound); + io.bitSetCase(value, "pointy", flagPointy); + } + }; + + struct Info { + StringRef name; + MyFlags flags; + }; + + template <> + struct MappingTraits { + static void mapping(IO &io, Info& info) { + io.mapRequired("name", info.name); + io.mapRequired("flags", info.flags); + } + }; + +With the above, YAML I/O (when writing) will test mask each value in the +bitset trait against the flags field, and each that matches will +cause the corresponding string to be added to the flow sequence. The opposite +is done when reading and any unknown string values will result in a error. With +the above schema, a same valid YAML document is: + +.. code-block:: yaml + + name: Tom + flags: [ pointy, flat ] + + +Custom Scalar +------------- +Sometimes for readability a scalar needs to be formatted in a custom way. For +instance your internal data structure may use a integer for time (seconds since +some epoch), but in YAML it would be much nicer to express that integer in +some time format (e.g. 4-May-2012 10:30pm). YAML I/O has a way to support +custom formatting and parsing of scalar types by specializing ScalarTraits<> on +your data type. When writing, YAML I/O will provide the native type and +your specialization must create a temporary llvm::StringRef. When reading, +YAML I/O will provide a llvm::StringRef of scalar and your specialization +must convert that to your native data type. An outline of a custom scalar type +looks like: + +.. code-block:: c++ + + using llvm::yaml::ScalarTraits; + using llvm::yaml::IO; + + template <> + struct ScalarTraits { + static void output(const T &value, llvm::raw_ostream &out) { + out << value; // do custom formatting here + } + static StringRef input(StringRef scalar, T &value) { + // do custom parsing here. Return the empty string on success, + // or an error message on failure. + return StringRef(); + } + }; + + +Mappings +======== + +To be translated to or from a YAML mapping for your type T you must specialize +llvm::yaml::MappingTraits on T and implement the "void mapping(IO &io, T&)" +method. If your native data structures use pointers to a class everywhere, +you can specialize on the class pointer. Examples: + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + // Example of struct Foo which is used by value + template <> + struct MappingTraits { + static void mapping(IO &io, Foo &foo) { + io.mapOptional("size", foo.size); + ... + } + }; + + // Example of struct Bar which is natively always a pointer + template <> + struct MappingTraits { + static void mapping(IO &io, Bar *&bar) { + io.mapOptional("size", bar->size); + ... + } + }; + + +No Normalization +---------------- + +The mapping() method is responsible, if needed, for normalizing and +denormalizing. In a simple case where the native data structure requires no +normalization, the mapping method just uses mapOptional() or mapRequired() to +bind the struct's fields to YAML key names. For example: + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + template <> + struct MappingTraits { + static void mapping(IO &io, Person &info) { + io.mapRequired("name", info.name); + io.mapOptional("hat-size", info.hatSize); + } + }; + + +Normalization +---------------- + +When [de]normalization is required, the mapping() method needs a way to access +normalized values as fields. To help with this, there is +a template MappingNormalization<> which you can then use to automatically +do the normalization and denormalization. The template is used to create +a local variable in your mapping() method which contains the normalized keys. + +Suppose you have native data type +Polar which specifies a position in polar coordinates (distance, angle): + +.. code-block:: c++ + + struct Polar { + float distance; + float angle; + }; + +but you've decided the normalized YAML for should be in x,y coordinates. That +is, you want the yaml to look like: + +.. code-block:: yaml + + x: 10.3 + y: -4.7 + +You can support this by defining a MappingTraits that normalizes the polar +coordinates to x,y coordinates when writing YAML and denormalizes x,y +coordinates into polar when reading YAML. + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + template <> + struct MappingTraits { + + class NormalizedPolar { + public: + NormalizedPolar(IO &io) + : x(0.0), y(0.0) { + } + NormalizedPolar(IO &, Polar &polar) + : x(polar.distance * cos(polar.angle)), + y(polar.distance * sin(polar.angle)) { + } + Polar denormalize(IO &) { + return Polar(sqrt(x*x+y*y, arctan(x,y)); + } + + float x; + float y; + }; + + static void mapping(IO &io, Polar &polar) { + MappingNormalization keys(io, polar); + + io.mapRequired("x", keys->x); + io.mapRequired("y", keys->y); + } + }; + +When writing YAML, the local variable "keys" will be a stack allocated +instance of NormalizedPolar, constructed from the suppled polar object which +initializes it x and y fields. The mapRequired() methods then write out the x +and y values as key/value pairs. + +When reading YAML, the local variable "keys" will be a stack allocated instance +of NormalizedPolar, constructed by the empty constructor. The mapRequired +methods will find the matching key in the YAML document and fill in the x and y +fields of the NormalizedPolar object keys. At the end of the mapping() method +when the local keys variable goes out of scope, the denormalize() method will +automatically be called to convert the read values back to polar coordinates, +and then assigned back to the second parameter to mapping(). + +In some cases, the normalized class may be a subclass of the native type and +could be returned by the denormalize() method, except that the temporary +normalized instance is stack allocated. In these cases, the utility template +MappingNormalizationHeap<> can be used instead. It just like +MappingNormalization<> except that it heap allocates the normalized object +when reading YAML. It never destroys the normalized object. The denormalize() +method can this return "this". + + +Default values +-------------- +Within a mapping() method, calls to io.mapRequired() mean that that key is +required to exist when parsing YAML documents, otherwise YAML I/O will issue an +error. + +On the other hand, keys registered with io.mapOptional() are allowed to not +exist in the YAML document being read. So what value is put in the field +for those optional keys? +There are two steps to how those optional fields are filled in. First, the +second parameter to the mapping() method is a reference to a native class. That +native class must have a default constructor. Whatever value the default +constructor initially sets for an optional field will be that field's value. +Second, the mapOptional() method has an optional third parameter. If provided +it is the value that mapOptional() should set that field to if the YAML document +does not have that key. + +There is one important difference between those two ways (default constructor +and third parameter to mapOptional). When YAML I/O generates a YAML document, +if the mapOptional() third parameter is used, if the actual value being written +is the same as (using ==) the default value, then that key/value is not written. + + +Order of Keys +-------------- + +When writing out a YAML document, the keys are written in the order that the +calls to mapRequired()/mapOptional() are made in the mapping() method. This +gives you a chance to write the fields in an order that a human reader of +the YAML document would find natural. This may be different that the order +of the fields in the native class. + +When reading in a YAML document, the keys in the document can be in any order, +but they are processed in the order that the calls to mapRequired()/mapOptional() +are made in the mapping() method. That enables some interesting +functionality. For instance, if the first field bound is the cpu and the second +field bound is flags, and the flags are cpu specific, you can programmatically +switch how the flags are converted to and from YAML based on the cpu. +This works for both reading and writing. For example: + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + struct Info { + CPUs cpu; + uint32_t flags; + }; + + template <> + struct MappingTraits { + static void mapping(IO &io, Info &info) { + io.mapRequired("cpu", info.cpu); + // flags must come after cpu for this to work when reading yaml + if ( info.cpu == cpu_x86_64 ) + io.mapRequired("flags", *(My86_64Flags*)info.flags); + else + io.mapRequired("flags", *(My86Flags*)info.flags); + } + }; + + +Sequence +======== + +To be translated to or from a YAML sequence for your type T you must specialize +llvm::yaml::SequenceTraits on T and implement two methods: +``size_t size(IO &io, T&)`` and +``T::value_type& element(IO &io, T&, size_t indx)``. For example: + +.. code-block:: c++ + + template <> + struct SequenceTraits { + static size_t size(IO &io, MySeq &list) { ... } + static MySeqEl element(IO &io, MySeq &list, size_t index) { ... } + }; + +The size() method returns how many elements are currently in your sequence. +The element() method returns a reference to the i'th element in the sequence. +When parsing YAML, the element() method may be called with an index one bigger +than the current size. Your element() method should allocate space for one +more element (using default constructor if element is a C++ object) and returns +a reference to that new allocated space. + + +Flow Sequence +------------- +A YAML "flow sequence" is a sequence that when written to YAML it uses the +inline notation (e.g [ foo, bar ] ). To specify that a sequence type should +be written in YAML as a flow sequence, your SequenceTraits specialization should +add "static const bool flow = true;". For instance: + +.. code-block:: c++ + + template <> + struct SequenceTraits { + static size_t size(IO &io, MyList &list) { ... } + static MyListEl element(IO &io, MyList &list, size_t index) { ... } + + // The existence of this member causes YAML I/O to use a flow sequence + static const bool flow = true; + }; + +With the above, if you used MyList as the data type in your native data +structures, then then when converted to YAML, a flow sequence of integers +will be used (e.g. [ 10, -3, 4 ]). + + +Utility Macros +-------------- +Since a common source of sequences is std::vector<>, YAML I/O provides macros: +LLVM_YAML_IS_SEQUENCE_VECTOR() and LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR() which +can be used to easily specify SequenceTraits<> on a std::vector type. YAML +I/O does not partial specialize SequenceTraits on std::vector<> because that +would force all vectors to be sequences. An example use of the macros: + +.. code-block:: c++ + + std::vector; + std::vector; + LLVM_YAML_IS_SEQUENCE_VECTOR(MyType1) + LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyType2) + + + +Document List +============= + +YAML allows you to define multiple "documents" in a single YAML file. Each +new document starts with a left aligned "---" token. The end of all documents +is denoted with a left aligned "..." token. Many users of YAML will never +have need for multiple documents. The top level node in their YAML schema +will be a mapping or sequence. For those cases, the following is not needed. +But for cases where you do want multiple documents, you can specify a +trait for you document list type. The trait has the same methods as +SequenceTraits but is named DocumentListTraits. For example: + +.. code-block:: c++ + + template <> + struct DocumentListTraits { + static size_t size(IO &io, MyDocList &list) { ... } + static MyDocType element(IO &io, MyDocList &list, size_t index) { ... } + }; + + +User Context Data +================= +When an llvm::yaml::Input or llvm::yaml::Output object is created their +constructors take an optional "context" parameter. This is a pointer to +whatever state information you might need. + +For instance, in a previous example we showed how the conversion type for a +flags field could be determined at runtime based on the value of another field +in the mapping. But what if an inner mapping needs to know some field value +of an outer mapping? That is where the "context" parameter comes in. You +can set values in the context in the outer map's mapping() method and +retrieve those values in the inner map's mapping() method. + +The context value is just a void*. All your traits which use the context +and operate on your native data types, need to agree what the context value +actually is. It could be a pointer to an object or struct which your various +traits use to shared context sensitive information. + + +Output +====== + +The llvm::yaml::Output class is used to generate a YAML document from your +in-memory data structures, using traits defined on your data types. +To instantiate an Output object you need an llvm::raw_ostream, and optionally +a context pointer: + +.. code-block:: c++ + + class Output : public IO { + public: + Output(llvm::raw_ostream &, void *context=NULL); + +Once you have an Output object, you can use the C++ stream operator on it +to write your native data as YAML. One thing to recall is that a YAML file +can contain multiple "documents". If the top level data structure you are +streaming as YAML is a mapping, scalar, or sequence, then Output assumes you +are generating one document and wraps the mapping output +with "``---``" and trailing "``...``". + +.. code-block:: c++ + + using llvm::yaml::Output; + + void dumpMyMapDoc(const MyMapType &info) { + Output yout(llvm::outs()); + yout << info; + } + +The above could produce output like: + +.. code-block:: yaml + + --- + name: Tom + hat-size: 7 + ... + +On the other hand, if the top level data structure you are streaming as YAML +has a DocumentListTraits specialization, then Output walks through each element +of your DocumentList and generates a "---" before the start of each element +and ends with a "...". + +.. code-block:: c++ + + using llvm::yaml::Output; + + void dumpMyMapDoc(const MyDocListType &docList) { + Output yout(llvm::outs()); + yout << docList; + } + +The above could produce output like: + +.. code-block:: yaml + + --- + name: Tom + hat-size: 7 + --- + name: Tom + shoe-size: 11 + ... + +Input +===== + +The llvm::yaml::Input class is used to parse YAML document(s) into your native +data structures. To instantiate an Input +object you need a StringRef to the entire YAML file, and optionally a context +pointer: + +.. code-block:: c++ + + class Input : public IO { + public: + Input(StringRef inputContent, void *context=NULL); + +Once you have an Input object, you can use the C++ stream operator to read +the document(s). If you expect there might be multiple YAML documents in +one file, you'll need to specialize DocumentListTraits on a list of your +document type and stream in that document list type. Otherwise you can +just stream in the document type. Also, you can check if there was +any syntax errors in the YAML be calling the error() method on the Input +object. For example: + +.. code-block:: c++ + + // Reading a single document + using llvm::yaml::Input; + + Input yin(mb.getBuffer()); + + // Parse the YAML file + MyDocType theDoc; + yin >> theDoc; + + // Check for error + if ( yin.error() ) + return; + + +.. code-block:: c++ + + // Reading multiple documents in one file + using llvm::yaml::Input; + + LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(std::vector) + + Input yin(mb.getBuffer()); + + // Parse the YAML file + std::vector theDocList; + yin >> theDocList; + + // Check for error + if ( yin.error() ) + return; + + diff --git a/docs/conf.py b/docs/conf.py index a1e9b5f6e286..0ac3b7836b9e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,16 +40,16 @@ master_doc = 'index' # General information about the project. project = u'LLVM' -copyright = u'2012, LLVM Project' +copyright = u'2003-2013, LLVM Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '3.2' +version = '3.3' # The full version, including alpha/beta/rc tags. -release = '3.2' +release = '3.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -95,7 +95,7 @@ html_theme = 'llvm-theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = { "nosidebar": True } # Add any paths that contain custom themes here, relative to this directory. html_theme_path = ["_themes"] diff --git a/docs/design_and_overview.rst b/docs/design_and_overview.rst deleted file mode 100644 index ea684155e00f..000000000000 --- a/docs/design_and_overview.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. _design_and_overview: - -LLVM Design & Overview -====================== - -.. toctree:: - :hidden: - - GetElementPtr - -* `LLVM Language Reference Manual `_ - - Defines the LLVM intermediate representation. - -* `Introduction to the LLVM Compiler `_ - - Presentation providing a users introduction to LLVM. - -* `Intro to LLVM `_ - - Book chapter providing a compiler hacker's introduction to LLVM. - -* `LLVM: A Compilation Framework forLifelong Program Analysis & Transformation - `_ - - Design overview. - -* `LLVM: An Infrastructure for Multi-Stage Optimization - `_ - - More details (quite old now). - -* :ref:`gep` - - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. diff --git a/docs/development_process.rst b/docs/development_process.rst deleted file mode 100644 index 4fc20b34129d..000000000000 --- a/docs/development_process.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _development_process: - -Development Process Documentation -================================= - -.. toctree:: - :hidden: - - MakefileGuide - Projects - -* :ref:`projects` - - How-to guide and templates for new projects that *use* the LLVM - infrastructure. The templates (directory organization, Makefiles, and test - tree) allow the project code to be located outside (or inside) the ``llvm/`` - tree, while using LLVM header files and libraries. - -* `LLVMBuild Documentation `_ - - Describes the LLVMBuild organization and files used by LLVM to specify - component descriptions. - -* :ref:`makefile_guide` - - Describes how the LLVM makefiles work and how to use them. - -* `How To Release LLVM To The Public `_ - - This is a guide to preparing LLVM releases. Most developers can ignore it. diff --git a/docs/doxygen.footer b/docs/doxygen.footer index c492e7df6cba..95d5434f6712 100644 --- a/docs/doxygen.footer +++ b/docs/doxygen.footer @@ -3,7 +3,7 @@ Generated on $datetime for $projectname by Doxygen$doxygenversion
-Copyright © 2003-2012 University of Illinois at Urbana-Champaign. +Copyright © 2003-2013 University of Illinois at Urbana-Champaign. All Rights Reserved.


diff --git a/docs/gcc-loops.png b/docs/gcc-loops.png new file mode 100644 index 0000000000000000000000000000000000000000..8923a3115325c6b0bc86c1f592e5e8b74bd02a46 GIT binary patch literal 21535 zcmdqJby$_#+b)VUqJSbG(hZV=NSB~==Om>Yll7qdOrHv^P67Q`5^;!E#wL8K=_9|I~B9vN^fx#`s z>Z&?5B=-uxqf19+Z^#g4!^ZB@F7opPeI&t{gjIb~^z`RJKfNY8j)E&CNOcq4$aDOl zsX|&c@ct)hDbLk!xK#8TXzD-eT#Oi0zv=EDck<2htq82>Pf7?GSXL$wtG|uSFG}vE zjfu)7j82gc9w;A@MH$M6DX>O#cHd@eCOGVY{~!y-?A(ahJXbiNY4ix0>JOq;Y_cOD z^CtdvL%D0nx=Qv=BgV-u%WCwgUDBU}k5G~Gk>k9RZoBojG6*S9iC~5H-G>ttV%DPz z){mL7R~s6j+--~cp>9V~OKlcWUKnZU(EEcu>_zp13tYBD=e>2kS6U}Y*)_(Waf4=< z57ZihL`Ez3NPo(f7!=n?8~URC3OgR~LUyB2-uoEo?KHC@X<$%zX9j%+&9nb`F1GxM zmGc8n(z~Vo`*iRRZ8W^2&gQqvvxxk*G=5c#8Lvjt$ToFqJhxx6EF|@iAqKWX->49}8^twbSqcwc@~Sn`w8xu!Yvw@5+dv ztQpGL0dp98xo3Rq-f?%>`J03iS>`<&#l8m*bvE!RG4L&%c?N`Yt*u4YaRfWi z>wfiYG+0|Zoxa$8KOJJ*`l!V2d)1d;W5uPcr?x7{0?#~xW79i+x_rTMqahR0y*5a9 z6K+2T>6=iQD9y}XJM!Gyx6Yd(Q`O)|3JmdVMU7kT6(a@}b)oWsb99r-D-ppr= z-VEOCXL7+#kXq_9TD_Qt2sSjnx-Dob-I!;wfg%2Mu%5-Rbt*T5szQh7BGFRxY5i%L zAW6xc8UymE+(qcmb*zK#8)szDyH3~3t>h~`S+`N|KE$O%Pevd8uGaE8--!@1nHrU& z@~G|m*d9%;i*C|VY_J=DN-%>|&{l}zx7K+s5&!g};`1$8-S>)*2 zxaO1jUuZnpyY7_O*RAJ+K8uqfo$oU8sDnUry!cZw6P8!UXHLCg$v}echXlP#jE`JD zL%)P%(l)%J4qX+SIca#}GJV5#Q%553;)?)W!$Kzy-+KdZzwzG0o1Y3^%Ul^D82h!* z`IStmMf3<&{*u`fGMMcAv{QAzo8O^&FN1^t1<$>UBA$B!2GkdX{ieGj5TqAwHYCjW zAZ%bgmx4GVAz_0*W8+^#N=Uj3BFeC(nwFE6yd1x=y)B!eiM^32o15)(a5oZ?pc_B< zqphivA*GwGjh!REn-KNyJNUt$5trGiDSzMMWGzIkC9gy&Ztq}9$<6kFjgwmVIwd8g zpo57Szly}ue;)^b6QZ_oa(d3s&hF~!%I3<&X76Cm{(z569VOzuhDP?zPD0ewh!0)<_s=+;EX}UIWas$rw7>+} z5&y#efQ^Iw^4;K3LBv&laeLe64yKNd;Qhi61b^T8U#?w!=b!tPEbN`^ffXDqjiv3J zOdY_(PKJol33LAY^#A&f|NbsT2TM~h>)%iR`S#z}{=Hw29Wn904DpYZe_sV=7QQaX zez|1A*B2whn2?Y}k)$QW)ZDIZOk+*S4<9#euXKk;Gk%qS&6zDn6|JvwPf1FmdQszH z5c*+8kPOwmEp;|JOHMjfXBvV}lk2hQUkN@&)6N^jCtvuCKs{%@XAbMlh0V9hXJ?PS zcu&?X3(xdUk`DQ-i=i(1Z8DD^BV9wq6aDvpl5>>i!B|&+N5$hnMr|ig#S9^2jSOLL z&{`WUW?$-zi&ItLd-4W{?Eb~^Xd`v(dJB<3^q``Mp4CtuRUo-X-q0s?S&xHtCirNv zV(0N@^5W7`&^Zm29}+SqCHP;9JZ0D>;f(sxd!8A_FyZ68^~vgDC8|)~dCa5J-QK0u z%rKW&*zrPZSlrpkp}_8X&{rIRF4Pg{wV_gT=?u64$+_znoN!o#s?wXo-*3dE#B;i) z9VztbN1VP#J8FHlOiUy;+h@@n#Z8hj<| zIgbg;+;*;BK^gvYOdat4-YL>I^dHh4vkhvd#M72kaBtx z=PgVK>l7992i#=m5eOBtmTR$5&H|)&rg8rj)liz)tMK7_bPaVe2t8B(^ z*KSlzKJHHB!Bx*wq8cjFtLyvl!1%F%>*fueFrI%W?@x`Jq9M9PxOibzFJxK(=h>+Hih$?C`Aipx-r=#nM)CuEEUFmT#BVwjViKm6G)r~^!*;YkYJs0ZrhX7c4O%Hq`oQ|dAI z)w@_W9BwJfIGts@bad69^}T?)d-m|H@RoCHjbs(J|1;T}p&LW#;uu^Xr+L<$dW8<9 zpdR~+xv4(4$&iaRi}lSg1@n8Wpx28p*H{}3Gjgzl_@ZZRq|jO;h2Qy8dBkwP?p7q% zMN<4la>?i@)!cRZWY+;{hW68=-Q*`hMO!v^O&_gp(Q&n2QHhM)46pwTxy(Xujk{I9_0?!Dx?Z~C&@2> zMQI$CAsx8IHsf=$p%g_g-*wLKvc9^$_46w(&+aDla3(y|X&VHL#6yj~&4RC}|3nu| z%5Vk$izkM#Ddv-4*=HjK5Gre04E&$t*D}vOi6`GA=A^X|8R2caH9Bk?DzN!EoLj>| z1|Dc@11;?|>b+P_w0}IDuj#rx*;zR9qi?x8abx)Lc9FTg{d>DxRf^vBLNEHpXWdtl ziK?b%HjC;u9sa8?iZ?-S#l=b25!L_vf}O$B;C-@Doiq8G;7$U@D=mMA^bDA|1ijpc zXXN6AUvppA_XUAG?r z4jYxD!Na$=3RT9%h%5k#S4E6YrE`6Glvgy4iv8ZX>AHPFFu3-gk zKDjRmyY<&yN}MPsCUKbG=tOx!K7Y`{`qxVY@zUtVQJl&o{=@~mpVdfWWsUH_fy?-h z=|(9UzJ84#Lj5NrVE)qaqa2^z7l&PiTR}WZsRN{BmaC7Rf^n>G9j7~84UHTVkJi&q zA@=IZIU#t=^-PKXDvjZyVnzyyGRR(C@x{iYJ*o3yB>UqsetsB;OqUi#cXg!^?6)&L zk!YMh8$h%P<=RUV+`jhPS661iW9LN_GJ*f*HcZMne=uK+Rg<8rEB4?q-%;Fb@hhW< zre$Hcn{js*>Qr}t)X6Fv%M$SlhhJ)<+MeC}crW5Q!Tk>ouOCwpKeGA2V$h(wv$KR$5pnIW{ojgoyi38m(GZX`A&51)uQKSjbyFM z#{D=0pPE&9j|oZl^|8{7=5qSvT%NsgrvAeNm!NtOlUO2#Aw-ueNEw1`J6RRegwdYr zwG(uMTu`ecj@{mZy=L(?KF4Gazwb&v2=M4NW zxYLiT-!a7$sAS8iR9dNQ6F@Q*)BU5|T=w`U*-m2mErz3~_9xb0=EOeFFdme7afKb*6YKN{@Pv#*-@bIgh z_L2_o7bTz8a7+Kya+ruy{+wnl^OqvO+FH%GDWz~Uyd551U9!Z(JjHu%$sq%0Q`taH ziHnOH*|{^Ctc)A~(OoynwS~=Hpb8NRIH^Dg!5Y4M;sqbv*x3v=Fe)Hu`0Si{eV_{F z``rV_;7gqenvbVv#Od)S9Lv^63x42WabAOEt2(8DW9tTaJj)`pTC!`EMCZ=kD{GNR zS#%0^O?-sgZN5ijIXk$%zr?8%OV|*;BuWal zxVEz_t@urs-6jC#aX2Zu%CGn6ZSGrw`fg&;d{kJsXmy17)s{e#&y;$iFA+{U9#DgL z;eF&iX@w`fl9PyCgs5>5f%ts50HR9BdVeb8@MNp8lgxFB$~?tAht&6UdnZ@qO!|B~ z%=Zp|g4-x_MY{F(p^vKa?z-jIVIOjs|0n)`XEO6$uJ zNk@1ThVxM*ovd7m*KRjYrzC}U^iPi-^jq`|$7{^)2jk{6ELf>aY~>2p&s;ZaYNoTP zbb1<3cS78|JhJ9of#2?c*wV2+Uf#|7Lwg03>bR}F{;u2F%T4Jf(6?e`o~PTbOBd%` z7b=g27>cY=#In+KT#IHQxhWo2V^Nx_J<(*qx(e6vXje=s1Bi}$eb39^kqKz9)vjig zpn0Dh#7x>Z>VZCEt!oq`m(+W&-(ghGRXXvJ?P6YLXr!?Baht*U4iPb*gUJ^-AIY5$ zW7YPly1r-o%VMl(gipNlCt;-)vaW2oHhcyk6LQ*&mAq9$^c#p4z+o{$Qf6bK@=0%$ zqR839`C6Y8;^OR+yCcO0?7pW96xoJf-#Dz7wsu(NCeyP+_ae)Kt3-u&ySN5wovrzn zKNxqZ-#sgZDI90@L@5eGKxM*M+<5NM#Z0|vd&WsdTZibEp1!}QPP=h_cCysYkQ(i> zF(G|+yq^s=V;Sfd-ooLoQ6s+1iFEtW&4rvqJBO;et%j2<=_q=$#$J(&`c>EHWT!I{ zyU)$K?fn?)OY-*RZR?Pdv=1ElYiH}%6Gp>2sPFs!?#DgAn zg*v+>geb(Nre9>uz-v3;28FP04?ny>D*xC+pYy4vo^uS$XlKH@IOu$^a|O*;PH_7h z{<2jEs2OdGzO05}3UtSFwR+hh+Jjx=-ZSw*eFh@^S0=fdE$2tQz8w>l)-(f(DT0uO zvjd&o*TdDbK05cft!OhP!$Ln7opTJZ$!u;3>b*eZj9dk>-LEpU);txr>15hKBeJyR zdr`LepwSpOR{GA61h!trD7Zs6@)S%JF0c?^)N-1BHwJ>=B zOQb@v%43(!Daf9r30n^3cnk|5tJY`_lZ|XUS;UwMs;5o#AG&29Dj_J3F210+XEn zpd@1l@$|&`?l$J0L)l3xH1a@})h(!Bf7$1~tI**i{z7k)N^aBBU)%FIwb(^Ourr=n zFB*}H)A9f zsIJ)`_NJ8DP4hdd=AQ`@>C9EGiwypz++E-pm)5MUpx`geMSKqB!?L;Fo}Qj3BTk*194Bhr97$Y z)QgN}i{>mES_|2C$@7q2ve)nmdfVh0@NCo?TY9r@*0b+ys%+5j3%RZMy&e%s_?3MP zD-+1MaUqVZEyij;KkO~{UPD3uaDM1I4CAzt8QI6 zpK96VsfNQD&nFi(C+ODGq&~lX7^*wXHC^kE5X#8*N5SZz2@^EZ=-2e#Qt(*F#A#nS z7uG8`dtV-$!VvTbotFeL{syA9ZhN2EaPCpB>j!*;10{Q-D@o4t50g;iHI`HlBJSjmOh$@s2ttNbnby zqnNJzOVO%d?1ly-^eRkwwFgeMOuO8bwr2r)a$Lejz}SXadur7 ze@4qYVy>)T)NY`LQ*urZKLBmY@75Qj_-#V^#p;9{@i z!%PV=Gpyw8LE}sKnsBPe8j~aWsuXecltT!yL%+V_D`zrp)7DV34Dq2S@Gr*pUqK95 z3yy`eYUr=3TGaCEY+6`WJ`$?p8_#LpxQ zXewE(MKcN|#H4;l0Zw|4uMt%lYf66nqzO3YIbSW7v}L|a*$)l+j1}rsJ`%yiCv#4- z9xXC633u$_liZWcLYD-XQwKLc)~95b!afrAX=(ekLAJY~pRcueegZ1tkQqN>=Kb~Y z!BgI_hmZTvh;*MFv%=lejG3)}7K)=xz`))nB9j(BVYL>2g+nCy7|DMQa}mPx0)`(Z z4Y{qfS=wtO^{aHqx3(nb#)fLtxxM8lhQdIK)|>L+s(OJ)m+xH5%PlyV#NMeqsK>(#V64`}gH?A}72-#KIDZ)t1!j`_Xec!Sjg3CH@osK?tI7p!5hV zoVSfC&9tA+dr5oTxp>E#M8yLQ$9h_SQPk)}a9s`jUCqD#o;hU2)yUjU|NX@~Z9L~z zh;ANhApd3|4$T3)xG_PQv%;Bguw95>yxyIzT)zH9!Az)lJ(7ue5>a0EqgqIZ=0y3M zPH0EV>{weX-)~)PsTvYD8+tYWNsqfir21fT^nt_v&Y95RR8;?n_ByAdLYXG$-6$cT zZ8cdMyXwa%*{(t3CzAKXG<_7OKCc|!%ka7?O(8^-Qa${9#D6uJdZ5YN;+8nNDt*h? zKpb^;;QXsytpn}bto7TSt9I1{JjVxLG8hh^s|Bt%(%z1D*;zDg zfkM=$n5yHdHwiaJ;H!uO6C~q_m7<@U-%PqnZ33h z%GWF|Ijk2$^wn?KL}eYEC}OYH`;IuUwviT$l=I4LPX0#y(?;9#Eu4m5P{(VX^B>wy zsM&q~f&NiFKQ;m@;4!EYIUo&QH2|}g?u=zM{q^&Q3TVeZ$4ai4GF`og?K)}$ojd3i zINf(l@PGBB2dKhcmv?B=a|IyW&<5rT*=* zM4zJ~Q_zYaN=kqOS`K6dk-9HN3=|uP#Phz$P<@2}-8ZM}WCN}fWEr&pmeyPm0x;~U z|J3`c=W9s6LcbYYP{OKHB?XY* z5tL(yPMax?RcFNAAVUJH-*)|ubq$pK5`qLN`xfTudrwd@5pJlw0LWHi;(iK<%`U}Io^a8k}4Ze@WYN(OZ8 zJ4>CN0B~mqhy<5<0c*#ouH$=rTKG<~fNQWzd0&NjFZm$Pczx-4Nad71dxT~H17c&w z-(md4ajU_V^J90zEm6u?Q8pijvtlgnk^ZIC9pg~Gr&gmy8__-h00v>Tk#{R!?c?nc zsL{#6hDwdY(&sn$Xg>llL)9M?LT0~s zZic$9k5K_If-BQ->_4w0{HHyF{2)G-5A;1ja0m@2n~qi`vTO@Exgw!Zo1l+UIuXccuvz`1au^}Klk}6r{RT{P(n`VXPpM+0m&2c_8};^ zBl#Sc+dv;d&1O!2=^04KVtAO?k*)8DX&Nui>jW=OcY8{mLH`1fqLq4f^-s|V7_P_j zFU$!KGr=Ybb!XfNl8nIWnry8|d97EJ^1N(72MkgG_6 z=!IZD55SJJ1fRnTOEvz>Drw>15r>8E@1NA^4i)NXl6(CWvm7l--dpV#N1%{3fa1Zn z?YeQbX;5D>;*CTWU}yoJi)%^;4>$Jj_feAjm&7g__#8cPdL5!ss4ZQnU755o>$xRl zo^XYBUPFD!hDS&%axiARtUTnwunP8fvhmhzV`Kfr`N@AXS(&I-xS;R3*Y235D{~q7 zsk){foubgbS~wZ+qrZkl>R%$<4kA?4be*dJ_D0ZduZRJF{cReTE-4sMN=eka%=|UM z*R7@P26NsE9!@z19Astc{DtP>F|r{WitW6;MI)EG=yP@)zc34+ORuA)I8K*!ExwcW z@{ka$)+=;csSEAP%SF)cs4Vv+GlMW3MaZfx_4UooZ2Q>;l|pT9kkgzVCcjbsCwr%rnU5l;S2&hc+C6&<ct$06b>3Chd_9P!K*cK_LF$k~#4IG9)F|YqWM6$vf8P+C2;B6v zjV7#(KvS0rr1)5cWd8Vm)?gCMR?r^&F#))#02inK$1M7dyObCdUL_AL2P6_6+1_VJ zc8aX9=>Jj^&#pPX+!b$J8~WM)=@1D);7tp}avtS~=w|}h zJx<$+hadH!wQ?Ugnjx$s{~aR-{$p*u{YgDPOYNgd4q6Rx>SZx%Y(kcB0LLt^i-A3B z@bul?TQ3S$o`eueA8t-(KMuH#5DUP@%X3#3YBv5u4FtDKR~$|h_O9jQw|z3Qi6JJ1 z5Gy>gosa@W*mbg=SXP}Zv+jhRQ$Ui&voRdJ)9T#VcYAUb=R$!Q>z!aKfPsH?gOpbZ zfSgvxD^`t*-`|rY#v&l-m8l5pi-lg{6FG7KDZe$H1+^+M{)E03D9jKthBFPL|54Y0 zZ4Z)Fw?FWDW}Bo%o*B2Vd#XK^Y(x{O9tbYPyD-FxV3cGRhkw-I0)@(DBBY%-4CMwUqzauuRV-KAb@ZtC=PW(mTwb8FP{CL~1 z{3$xZS7G)EN?}3=S~Y++T>=!2w3DYnaq+Yy;Cc0TrEI+ms?G;Z8rPgaGW$}iX*1iP zH#IF|#5(HzSLl05_w40j^D7Klz#uQaeBmx$3DerwXd1)^nZ{^euKo-l40a#NzZOlf^@|Mf@~Pp-O&OF%{|cu04Z^rb=`#x z=f^z+tpW!Ue{i6%XtwLtj2R=ZH%m1Rih1t;Tq$zwMQ`!CG`HC z!kg>rD~L3ELSf zHC_TL5gZm|j4O`S&rb&t`+=0m6mTC1R@1USBLFyo5~4Z;oKRGMres~U@xA~0g?;If zOcqF47J_u(G-(TOB7gYzOGO`oG~o{7WriLzKp`=hbgBv-S`A5Q1IGichr>=ggDh+y zJ98t%^ly_t216m|bI9rfjVGX_43DXO!JRumiPe^;oM{4*SH{*~tBqJA7O5ftU>6uP z&g@Cs0R~`I)KHJo+6taB`Nz~cYz240C&1X9{X4T zv$g?9l|??`VFYNJI;0trO`g{AuH|LoU7{lMgi^~?I4`!62yV5diH_kt2MMmE9>n-E z04Tfv2q4M=-BFowD-NRLtpF;KS1r#*|C;76Ny-8`Nb6I5faDJq8_ZTvS~GM0ks5%C z*MFWKIB2TRF^tpu*vUApu+81wy`}rr!d0jh6$L3iTQ;r(2!2fFn*2GPR+YG(mL162 zO?@u4gcRx4aGLz}MO1x6HU`=nzB#=*mw@+Cr{78JKlpSoYZl!Q#KgFQfc*ThA}h1q zT7n5UjlN-WJ#xs`DC}ZhpmltCwWIDxUc;wt1mJZ~skQYEC|mB+$wYJT{DftG1WKU9 z!u zUR>MNvf9wt?w6#9FEsFg(e3hDZA5vkB)iId#V1MFjsz)S?Au`v;a^L#n1-k}>++uf zSOXDfm!BVOOfn$!dI$~+12>HNuc?NA262Vdau?8=GJqbUW;G)J%YFUta7?ozyPJ^S z`Rk%IARrmATFtIL*-QY4Zo6rRzZ~QL2RFSv(psA%=a)o8q{Wn`VDKk`+U=!u6Hrqa({)W?ZWxM zzjK*)rd$>I4r(ABNHT{SU0K&e0bFvdS|`4YaFmj0$g>RYAZqS_c-iqK0Q*UtqfdFUx%tr7SOv4BfKfj(3_#pl>E&;*1? z70|K8b6dyCflmCBW-%K+z1&;RnsVyDl9B%{MnW-%Fmj$=58a9=Gvn1ERZ4B8ZI-~XV2*|K{8G&m&)!cueW-WW|oS5&+@r+?56}) z(=N_5r1qfYnW*%jjErrXXfIAE^a{S8K6+$bGobuN?I5C~%5GHcAgZHklap3p!}(m> zP9&NNB68^BK}s;ib?#}C`{aY>vsm^U^+g!PyQBQ=Cd9nB*HiW*X3fiRs8_VyibhR; z^7|5O*WM`Iv7f9eypu@j4su^&9f1O6YYg zOwgyixaY2|-+T`WR#yC7zc}{0gBq~*28a@sk=>ln$zS{8th^kx2xZNO3d-hk9a!c zS@+V)O5%^%hrBaDW7Cz$qsZ^HI#)nw{omaO)5|pkq&Rh-(``xENTH5aw&kpeJZQio z0J|BF@os&(P7@HCYzTm&-0nY!n8bE~LikQ)*<}?6|E8_K_=e>Q;*5r2KpL}~6lJrNYDc@)2&{Wj_>>X%>OEBcHW$z6q}bHZy8eu*TD0mFzUz$K zi4J93uLIz64r?SY^^pE=k;QP7Xvm+(n%|~1Cr(<&Yg510@XoPvnbEZfyDvBq&MkH1 zsP0EK=KwD8Tz%*};M;ocvI_imD=Bq9jUhF(kkNx1vXJ_wdYAf{wR5+BC^P*CFGqjXK<}`!$AabK(__VpQoRlX) zdsOe7kZ<2b&HBRk?%i*o(54ZHps#!OlAyk%#Dm$3`2AF~F<3mn3$H75tTY(d$G?2i+q`H5z+F*r%G_(gxU$BK0P zyu-+kjQRE_Mo{_h<^S(a8m{awRrr~>J3N_40Kp(>F4edU3`6~F*d-Yw zrYuM_w>#%fpo9ddk#YdACy0&fts%GAmh7l~!<19$H`e)Q_`zWIY!h$jFnN)EKG&}Y za`ICCtD3}ne$8$Q+YQap?P{k40YZSm?)zWm(>1^f;{0#j#8La?M!6CrEr0?MX9{HX z^;0Tr##P?nQm}z~I<%B7jRm({ zjj@JBG@LZ^0HBPt0O^baN>?e6hO^CkQ-I_k_^wXr-K(XH13}1mXyy-|qJ2qf{jL5? z0p@$hKMeqqXh2;-@+E?9m)TA}S`qrIE@;LF9E%(_+E&Sct3+((I2Np^4CR#sCN^C{j2~KGJoSeS?5~((0)c2z$nJ#EVqqS39l-%#XMkx zPz}IwT%17w&rvl!!CfHZ%1QppfS$ z7WVpl7?6(IqV3m+eV*r24F3T%a}RKZc2&evSzmm;$e>qHQ3YXuEf&NX&Ng^E0NSz>0ofvS#$8E#TyRrL@ms|gY^ybH zovhU#cGCc8zEjWJMial1KQtSEDm>e%`vk@Ul{$Mr)(GtofJr1`S zeU4Lq4mY@n1PoC-P*Z(C$(A>`wf{BOpdQF2WshAFzNHjF53~VvWWSi=xfzkbZ7l;v zVhYMc>2;>VN87SwZmhXs*`ss6z`!1?h~+9IBPd@8`v`&yL7*x}{Vv~$&80wLYXTs8 z2x}mE8-?#_=_%0F#g_L9Pu2?qs0G6MN;CclwmY{!8zA-yJ>GE}piGZM$SFM6OAFAg ze2(-FYRUJKP7&nf(MoC@z}?OEGtfUcnla@`8F&04mZAn*g7S^T0aKBV`jTGh#ToZ z`WV;)gdE|{9|iV+PS(8s2I!NolR0)hP|yc5S6nin>P!bBG3Et#$qvfN{kcH?w~J}} zMwLmgflCl+H+Z>csSiBrJ^qUS2{hzw$24jQe!_Oh-Gsdo1hl-Nz@(wY3;}kl+Pe4gpZ` z0lH`j@Oo1A=r7jgb@~x!|VPffPP%Zs!TjKqgB&GE37hyWz6dm;TCv=p90PnG&--YU53Ko%}6P zYM|7aUiq18`ZMA^dw-w9+TK^7SP$|sFr#tbhLXZQm+9d~bfjsvC3j2bmz+bb0;tJ6Brm(@> zu=-!~Av-|jNubMJdWe?!eC9;4srDWI@nPcyS;7P3H%v)YL#xGAPSsIO?CnJ z@){hsBDn`zQ7(3Td<{w?pY*kc{juR(u-U2deI4=+UQ~)}G({fR6tdq{rPw4h&nJJ_ zD|O_lj1ke1zBHJB--C`SQCcKqBGQ@xE}3cBQZ;M7px&8QIPO6wt|MDPr`Y@CEO%ud z#I2NmBa3r{sIDes>A-jGWA0;64sq@d4u7&g`+!T?|He|Mo*vKyj?4#zOE0OW{aq4Pwr<$hc>k8G#4vM-U#*35<%IoBCf zSB-cNsazSie`l0a>)NNIQ&`0?TFa^JfX1-Xxoxn<8*p@QDk&2jjo{WC-MbI>KAa}{ zq!0CwiZkWcLrAOzegd5Ed07JDn4{)QCAZ(Au|MMEpWiQ_Bf(}~iqX`!d^uhc&(u)5 zIfM&gb)-7u9D4Ta*FqRBq*10@zmV{C`0z_x)bB28194x}A-2pzbk2u0-d#X_Z=hueQ^j^g{(L9At1XP9JczOj~(4L8`)>T6MH3U?V=vt|{oi zwV5ro!EDEqZ-&!?k5d`9bZ4|TpeR$i&dBolCq2|c;RQ2&{__wGbX&o`M$L(vgUNdW zDht7K40Z7L&c)$TG3Rdwx5Nv>Oz_1V${Z}G6nul~bfg~s9PmwuWz)0LX2mu;1Ukb+ z1F7Ry8nX7M}4wa#KT9ev?z;MEDp0^W9KnpsVneI$^T**z1>+ zPi<1`Oq+z`wt%xhymbQE~NiYrUgXa9cwFj{kVUZj26;XM$<;EBMz`9D zw2nb=4n?%HQI1-uTSuDlz#3Wortmm8Duu_aBz=F&WKA?|&`REQXHXe^Nc4m$=WjFD z!YLCV6%JZF-9ZO+i6XuY<=AAlwDBHN%YuD!g(RjDt!Tp=s`5dx&DUq-43zTsyV|u6 z)4d-@$)k1R!B5(14g2Z*{33?>oSIV4>6F$&z1c&m(z zdmT@x3&qRO&&J&M-k#_)k-cwQOEB+|JoK4Qagkh+GnW+gwS<{sL+@n7UE9;l6@ame zz`s3eY_J;sM8LmMF_`3|xw*gRkbhn~Dx36?enUL3a9~s8qm*~&-oU5O`-V4pOpi${ z$DK$HUD{*mzjRI-=297#XQRzcqg z%w=`;P|y~s>aO|FwmoSYs_{l^R?;J=4jiPTf|3?{EAPuFW@K(jtZjWV9hA);=`(V9 z(4FsdXaO%up-C9nhe&1Lh(8uweL(LaBEx^dmS<0G+Si+!5;(!GzBSqB_@&|grxO|a zmJ?U1sM*f#*k*69k2z$0;Fn}75WfWgD?oTUv+T(8cZM|YS#&>hJOtWB*V4$8(A*of zj@ECr2bZ31CHCF=Si!B+85^3FQ@Z&+f%k<`9w6`y4`pDh%h_3LAk1af(hK2+kG~ zLZhXK_e@4fmiNLk4_b%PYi>)!@=%}!S>6P#?@5RT8kAsCcCWO~Izem;T1>Sf`^>&V zR3TeI>VdOblNSf&_6z38UJ&f7e<)fDO8ABDM>bHW=pUezoO#8uQPek3N&FQ~x|j7pH4vr!e3SJ7&pJd7~zRyJ=jWJDrN+ znbKgE(U|Xfu?dstOuH6w5HNhAKJX%5MCJU+neBjs`LXACRgea{XR;HN-YUGvYx+dG z;s!W$opf|!A*4%NP#5i)+Aaq_6fN+~+$+y}@9J`~ZnE>jUOaq#r0q+E%l%O#Ro`1R zQB|+ketjKLoti3_hkvMwgvoQ>)3R=LJIup3WR`SS6uqi-1goW>OE&Fe#&Yp};ToSQ&ydhqWH2GL>&N#`_o2CK9Kk9|>JCD;;;kfKwXTXL%04*!Kk+-_!a7|iAQr27l<&%2{A zU1sr5vyy0G2D3VfE1a666-)gd-<;DEAM7uiZX`T%-PARAgc|OK`JQI(3J(X_#8LJ4 z8aFQtDhKYN)4|W*y$qT6*>Wb1>=u#oP2_m%RlDZu|DD0ZSt&4Lkm89)W)<1V>RS~L z@-b=OdN?ERz09%Z3Mk94=F3Gjw5T!tlQEcE&dB?l=}`*&?RzVO1}`Ea(aSwreKMG| zl`>9(hkH55ygN^o-egRbDPttKmZ8Je43?eGt?+u6PBVI1o%k&HsIzU7F(P)VR+dzV zBhQ4(!Yb$T(XqDoYKDvOCN|_Qc&*@d=N7JE9?H)fgX(+Dz)gaLg5+oY1xB zRKusQepX|>dTH+T%ahD^hTKPkWNrP$k9~bIc~~tjmEdUG7G@+(orzP?f%$;!+X?;* z@jMujezS>4nm9YpYIcBxilA5!v@zqDIA@}|3)i$H?7@I_4qS9sE>JA1tW}^Fe!Ag6 zKaic4x7$CXS!+=Kg8bWkN46NM_^r&0P!SaoNxkYk;c5@uKz&G`C)aJ5r@q=bzeu9# z&#x=}aLz263*C#!*J2fli@vdzWI8m?A4tx3c6a5mOMTAwF$$~hA#Z%*^NOq>RJvN9ET1?0Ruauc6t#Bsz+c{~kI0Sq zFDK)?GC5A2Yz0Jz3*YSx6+D=iTuU>Xq;he*Fm7R3X2P$U-om<`+sOz2 zWn@1TLuZWBrsXTqASa~O&kAk^c?^r%%Z+4dCOtGOla!TSPD%R0*P0?8PCChGY(3^} z@erkM%N_`My4r4z{3@1SNwiRk4Dxy7!zeE37yk*Rd(s-Ec-QrWVIZ3leKF;Bqv-l8TaUu3WyNxOBwZoY^5#2WC&Fg0=TN9P}mW)FqCIYbM_WYls_GwiIGYV+Nvv!411ybD%up$>? zI{G|3zGMv?@@SXN-;~MNpcys=t5jgz1`IOu+aQy(xO&&8mR=~D9}7NTQ6F+*i=!g* zW>wwr+zjNrqh5XBb!iU0f6VbkG!@TEh<@V6M$Qpj-tAaT-kUc(`C+&n5PP_LqzUzQ zYQ8jK-!BiZrg(W9%mRBzx%}ru6a3VcRK$s?r1Oil2imGNabqrpaM*7utFo#dVQ%wD zQ4E?s=9RC3xURx1{O{6T1e`^rY@1)AIrf)e@OC8jrb0#(Z^V+Xn4_vn@{YQ`L z{7?I`oAk(Y1{sv5PPD%>!vcFVfAe>R%*CB^&gbzD`xurjJ70j}TpTC@oyb{5+a=;1i&NRNazRVQj#^>;@}(NtR|mt_+ZL_$aRCObqb1DEPIrrbXk8rLj6Hz zCEXKk`pnhKZ}9x{4URr@fmM8}%GpvJxip>yfE@J*;y>02tn4j45ULsk2xWKXNUmb4 zUGGjevsjt^Cu&EuZE>afKu!|{-UcC0TQJaVrc#X}>^rRuvF$_gm)4p2$2wo;Dh3s> z;VS8@%lGcWW9a4a85AD+my_2eJ1qt~y<0ckV%IBnuce{-N1YT-@9)?vq!$3?RqqTG2S}E)C&w#2clK`>$b&-gW1Sr@i24zd}a!LT8*Y(GizvKuTa|8 zED^ddyyyAm;gE7zPT5|a7`X$`{%2X_hxIuM8a7fbwx@6)cN`Zj2J-)yO+ zk}M#kA?AIarVGwvs$@t66_CaQ`C1GL9X_hyZLJ!I>~|C*`UTtl*`OI-=F+cqva}rj z6r+`?)h3QBv>0vk<4eHu$ul5ja9B+b4E7Pm75??ClpJrqV4*eW)u|Ta?~hW3*CB&j zxPI&Tp5$I2PS1kh-zg)c%ufPetK*hxzl8^ZdQ<{zilGcBIv$hiRNKXX6VV?;55Xx0 z!n1P*YVYK~rcYwC4 zP8s6;9`R!m0{~S4jA^1)uU|Mo$Ag{AQ))7@{41(>S5z|S_~NWW$`FDzK5C_G83IKV z6Vn}WVu!3+MtbC@b3nIr2OiN+|M?8a2)^C>`1ts6%l@S)Pec{R=qL5g3PK$k%{GMc z$Z@$V0-PEC;9Quy+75m(386y7zgf1@o0{r)t&E(VwS80D{8E9%_y*a)dp~CNrn72} zTm%jo|DD@oK%5U~0u99PWR!mAjwKUt`Jk~#CI05^{*WM~&ZW?zsowA_IuoSKBz?p2 z1v+bcjh1!vrQ$&8(oAI-o25NZL2Pa6<5eDlZ) zET`E}oC3-P#E-{xJ-460JpzzpI8fiZ1F`ZqkBpSlV|cN{kg{aFaJ$F4?!n+GS6&JVX$FB@n=yd7#*`vI`xxJ0oG>l;$X>f33eKC1XU|d-r z%$*7ox`7j#C^71wuox&L`84%8Y=#Cn;j96i*6iNuhO}M72sFV`_ based on LLVM. - -* **User Guides** - - Those new to the LLVM system should first visit the :ref:`userguides`. - - NOTE: If you are a user who is only interested in using LLVM-based - compilers, you should look into `Clang `_ or - `DragonEgg `_ instead. The documentation here is - intended for users who have a need to work with the intermediate LLVM - representation. - -* **API Clients** - - Developers of applications which use LLVM as a library should visit the - :ref:`programming`. - -* **Subsystems** - - API clients and LLVM developers may be interested in the - :ref:`subsystems` documentation. - -* **Development Process** - - Additional documentation on the LLVM project can be found at - :ref:`development_process`. - -* **Mailing Lists** - - For more information, consider consulting the LLVM :ref:`mailing_lists`. +Several introductory papers and presentations. .. toctree:: - :maxdepth: 2 + :hidden: + + LangRef + +:doc:`LangRef` + Defines the LLVM intermediate representation. + +`Introduction to the LLVM Compiler`__ + Presentation providing a users introduction to LLVM. + + .. __: http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html + +`Intro to LLVM`__ + Book chapter providing a compiler hacker's introduction to LLVM. + + .. __: http://www.aosabook.org/en/llvm.html + + +`LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation`__ + Design overview. + + .. __: http://llvm.org/pubs/2004-01-30-CGO-LLVM.html + +`LLVM: An Infrastructure for Multi-Stage Optimization`__ + More details (quite old now). + + .. __: http://llvm.org/pubs/2002-12-LattnerMSThesis.html + +`Publications mentioning LLVM `_ + .. + +User Guides +=========== + +For those new to the LLVM system. + +NOTE: If you are a user who is only interested in using LLVM-based +compilers, you should look into `Clang `_ or +`DragonEgg `_ instead. The documentation here is +intended for users who have a need to work with the intermediate LLVM +representation. + +.. toctree:: + :hidden: + + CMake + HowToBuildOnARM + CommandGuide/index + GettingStarted + GettingStartedVS + FAQ + Lexicon + HowToAddABuilder + yaml2obj + HowToSubmitABug + SphinxQuickstartTemplate + Phabricator + TestingGuide + tutorial/index + ReleaseNotes + Passes + YamlIO + GetElementPtr + +:doc:`GettingStarted` + Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. + +:doc:`CMake` + An addendum to the main Getting Started guide for those using the `CMake + build system `_. + +:doc:`HowToBuildOnARM` + Notes on building and testing LLVM/Clang on ARM. + +:doc:`GettingStartedVS` + An addendum to the main Getting Started guide for those using Visual Studio + on Windows. + +:doc:`tutorial/index` + Tutorials about using LLVM. Includes a tutorial about making a custom + language with LLVM. + +:doc:`LLVM Command Guide ` + A reference manual for the LLVM command line utilities ("man" pages for LLVM + tools). + +:doc:`Passes` + A list of optimizations and analyses implemented in LLVM. + +:doc:`FAQ` + A list of common questions and problems and their solutions. + +:doc:`Release notes for the current release ` + This describes new features, known bugs, and other limitations. + +:doc:`HowToSubmitABug` + Instructions for properly submitting information about any bugs you run into + in the LLVM system. + +:doc:`SphinxQuickstartTemplate` + A template + tutorial for writing new Sphinx documentation. It is meant + to be read in source form. + +:doc:`LLVM Testing Infrastructure Guide ` + A reference manual for using the LLVM testing infrastructure. + +`How to build the C, C++, ObjC, and ObjC++ front end`__ + Instructions for building the clang front-end from source. + + .. __: http://clang.llvm.org/get_started.html + +:doc:`Lexicon` + Definition of acronyms, terms and concepts used in LLVM. + +:doc:`HowToAddABuilder` + Instructions for adding new builder to LLVM buildbot master. + +:doc:`YamlIO` + A reference guide for using LLVM's YAML I/O library. + +:doc:`GetElementPtr` + Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. + +Programming Documentation +========================= + +For developers of applications which use LLVM as a library. + +.. toctree:: + :hidden: + + Atomics + CodingStandards + CommandLine + CompilerWriterInfo + ExtendingLLVM + HowToSetUpLLVMStyleRTTI + ProgrammersManual + +:doc:`LLVM Language Reference Manual ` + Defines the LLVM intermediate representation and the assembly form of the + different nodes. + +:doc:`Atomics` + Information about LLVM's concurrency model. + +:doc:`ProgrammersManual` + Introduction to the general layout of the LLVM sourcebase, important classes + and APIs, and some tips & tricks. + +:doc:`CommandLine` + Provides information on using the command line parsing library. + +:doc:`CodingStandards` + Details the LLVM coding standards and provides useful information on writing + efficient C++ code. + +:doc:`HowToSetUpLLVMStyleRTTI` + How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your + class hierarchy. + +:doc:`ExtendingLLVM` + Look here to see how to add instructions and intrinsics to LLVM. + +`Doxygen generated documentation `_ + (`classes `_) + (`tarball `_) + +`ViewVC Repository Browser `_ + .. + +:doc:`CompilerWriterInfo` + A list of helpful links for compiler writers. + +Subsystem Documentation +======================= + +For API clients and LLVM developers. + +.. toctree:: + :hidden: + + AliasAnalysis + BitCodeFormat + BranchWeightMetadata + Bugpoint + CodeGenerator + ExceptionHandling + LinkTimeOptimization + SegmentedStacks + TableGenFundamentals + DebuggingJITedCode + GoldPlugin + MarkedUpDisassembly + SystemLibrary + SourceLevelDebugging + Vectorizers + WritingAnLLVMBackend + GarbageCollection + WritingAnLLVMPass + TableGen/LangRef + HowToUseAttributes + NVPTXUsage + +:doc:`WritingAnLLVMPass` + Information on how to write LLVM transformations and analyses. + +:doc:`WritingAnLLVMBackend` + Information on how to write LLVM backends for machine targets. + +:doc:`CodeGenerator` + The design and implementation of the LLVM code generator. Useful if you are + working on retargetting LLVM to a new architecture, designing a new codegen + pass, or enhancing existing components. + +:doc:`TableGenFundamentals` + Describes the TableGen tool, which is used heavily by the LLVM code + generator. + +:doc:`AliasAnalysis` + Information on how to write a new alias analysis implementation or how to + use existing analyses. + +:doc:`GarbageCollection` + The interfaces source-language compilers should use for compiling GC'd + programs. + +:doc:`Source Level Debugging with LLVM ` + This document describes the design and philosophy behind the LLVM + source-level debugger. + +:doc:`Vectorizers` + This document describes the current status of vectorization in LLVM. + +:doc:`ExceptionHandling` + This document describes the design and implementation of exception handling + in LLVM. + +:doc:`Bugpoint` + Automatic bug finder and test-case reducer description and usage + information. + +:doc:`BitCodeFormat` + This describes the file format and encoding used for LLVM "bc" files. + +:doc:`System Library ` + This document describes the LLVM System Library (``lib/System``) and + how to keep LLVM source code portable + +:doc:`LinkTimeOptimization` + This document describes the interface between LLVM intermodular optimizer + and the linker and its design + +:doc:`GoldPlugin` + How to build your programs with link-time optimization on Linux. + +:doc:`DebuggingJITedCode` + How to debug JITed code with GDB. + +:doc:`BranchWeightMetadata` + Provides information about Branch Prediction Information. + +:doc:`SegmentedStacks` + This document describes segmented stacks and how they are used in LLVM. + +:doc:`MarkedUpDisassembly` + This document describes the optional rich disassembly output syntax. + +:doc:`HowToUseAttributes` + Answers some questions about the new Attributes infrastructure. + +:doc:`NVPTXUsage` + This document describes using the NVPTX back-end to compile GPU kernels. + + +Development Process Documentation +================================= + +Information about LLVM's development process. + +.. toctree:: + :hidden: + + DeveloperPolicy + MakefileGuide + Projects + LLVMBuild + HowToReleaseLLVM + Packaging + +:doc:`DeveloperPolicy` + The LLVM project's policy towards developers and their contributions. + +:doc:`Projects` + How-to guide and templates for new projects that *use* the LLVM + infrastructure. The templates (directory organization, Makefiles, and test + tree) allow the project code to be located outside (or inside) the ``llvm/`` + tree, while using LLVM header files and libraries. + +:doc:`LLVMBuild` + Describes the LLVMBuild organization and files used by LLVM to specify + component descriptions. + +:doc:`MakefileGuide` + Describes how the LLVM makefiles work and how to use them. + +:doc:`HowToReleaseLLVM` + This is a guide to preparing LLVM releases. Most developers can ignore it. + +:doc:`Packaging` + Advice on packaging LLVM into a distribution. + +Community +========= + +LLVM has a thriving community of friendly and helpful developers. +The two primary communication mechanisms in the LLVM community are mailing +lists and IRC. + +Mailing Lists +------------- + +If you can't find what you need in these docs, try consulting the mailing +lists. + +`Developer's List (llvmdev)`__ + This list is for people who want to be included in technical discussions of + LLVM. People post to this list when they have questions about writing code + for or using the LLVM tools. It is relatively low volume. + + .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev + +`Commits Archive (llvm-commits)`__ + This list contains all commit messages that are made when LLVM developers + commit code changes to the repository. It also serves as a forum for + patch review (i.e. send patches here). It is useful for those who want to + stay on the bleeding edge of LLVM development. This list is very high + volume. + + .. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/ + +`Bugs & Patches Archive (llvmbugs)`__ + This list gets emailed every time a bug is opened and closed. It is + higher volume than the LLVMdev list. + + .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/ + +`Test Results Archive (llvm-testresults)`__ + A message is automatically sent to this list by every active nightly tester + when it completes. As such, this list gets email several times each day, + making it a high volume list. + + .. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/ + +`LLVM Announcements List (llvm-announce)`__ + This is a low volume list that provides important announcements regarding + LLVM. It gets email about once a month. + + .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce + +IRC +--- + +Users and developers of the LLVM project (including subprojects such as Clang) +can be found in #llvm on `irc.oftc.net `_. + +This channel has several bots. + +* Buildbot reporters + + * llvmbb - Bot for the main LLVM buildbot master. + http://lab.llvm.org:8011/console + * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console + * smooshlab - Apple's internal buildbot master. + +* robot - Bugzilla linker. %bug + +* clang-bot - A `geordi `_ instance running + near-trunk clang instead of gcc. + - design_and_overview - userguides - programming - subsystems - development_process - mailing_lists - Indices and tables ================== diff --git a/docs/linpack-pc.png b/docs/linpack-pc.png new file mode 100644 index 0000000000000000000000000000000000000000..bbbee7d67ef9973c7bd6f99a39c3e1f75794420d GIT binary patch literal 13578 zcmc(G1yqz<+cqsBiXb2$GLq8L(jXw+og+vO4Bc%L5>f&Z(n!kCNF0ln6r>U978qiP z`Sy(GocFx{_kU}B|G)k?Yq{3U@I3q3`+oMm?(4el`$Xz!DU*}Zlj7jukgKXF>fzwv z)`EXGh>0-2ctr2u;1IAnDJbZuDkwm7Jl*V_TD08lM{<*lCm_Th7cp__#>sOTD%E8eUyBEP#=C>|G^kV+_Ramb*^>5(iRv!H5kdP z-43T_452%^sM|XNt5d()cH!j6sh((R@A}TLC;?t6UTVl|O8>zQ4r#5MvLvwsTc!L^7qAreM1srLb)`%>c%j5MTz`GMY^dj*_mLPq*Yue zr)zWaha{$+OHICS=z3_>z6`Fihn3q0I$W9fPS~79I}x8fJ+$FXbS>b!?0X6RC8Fz- zmLqyLb$RW|D~gINS~R>i-%8XZ^L4TWlRmZF6P02U?0o#>V~@1}v-S|B%XENv+|Q23 zG1as!yt%psk{y-qZC&q7Wc9~!Pnm2G8@Cx8sPe1CzGrL@tN$VK$0b5iPgewX>IrbqR~oj*;lQs>@h(H;;GHs7RzT%dCF5gU>zg27}r zE=hG0H6MN5Y=yzRPd)ZtOh>zRh*rVB)O|SmUipsq)Kw2p@_t}c%A4*T-w!1IER50? zxWjDZ>uz(V0~2}^@A3-Pjw1$G*#D62l<6I6{4wzMJomf=UJNa!9hsGMeoQA>$Qr^G z!hLotgd>FiOe4zMw2AqQ)iSv?fsatUdAlW>?R!uq)S7BK%F^*0EL(&_U#mN4fp#(J z^!;hI6hqb3M(B;iqJ@^74U&Vti)V~2*Stv)&YHDByuS#p314O-$|M^5V(|HKsW*-3 zWOibqo@nQn@B1u8z80?+Q=mXMfp z<@-yl_Ve0>FCn0n#hkb%!+RJb_lude|_`Q~I`;%%a-A@RV?mCxGN&Bl(;-_;#lje{fQ zF9ANf+Id?;{9Rq(UK0M&%-Aa=z-P?Q{LB#SCEhS;W)n>vh=QA^9YlmrfKQNFh7$h51!N!Ohj()6UBaj4vY~g}w6Me*4#ReqXQS=;rMPqTuQDKo#z7 z=Lv51w#Lj(M)1$O|LZIM$Fa0Mo$SD>v3LLV?0^3D=k-$jn1%l%5Wl00{S|~+hE$6G z?^h;6y6`fN69*lz#n&W`a%Ph;Y9ndL$xdN`NTs>?9twY1OyArCO4w^MBCL2 zP11KD&!f6?^{(Q|U-jXIsF>TyD_N{U$n=scJ}^U+x(Vf@wk{m+W=+#2PG#8M^=uSs zL>S!f8CYC!6l$j zp~rn_HssBA;UVTPaymanu$ROO;}O*6uc}=l0N2Qc=cK4sMkFO&9c*)q9`+nu>)Hse ztPw&m7Bk@zO!3O;c=@3RB*29@5!VPjzv!gora{`r2!wExxvt1TCP{DW^~_gXmM(X2 ztu(LKq8D*yO{HzMh@#=;@|z8*&o3w#FR$(HOA}IlYf!>`ytka%oy^^{oE-*FeMedO zem6=OK4n6B^%lggCnZ^Ot3lxslZ=a91~L^I$|>SJ60gqRV!+pS>gn{o>P;~E>`>kg zPpnWQjXBlAGty~xsV@T`@o?x}jnJqlf%Fvy%=$@J7?n&Y`EzQt{1tjkC(pyJZ@9Ux zqYftB>KU9j`x7~|bsDD{y-s|$rkqmuE@bBfuX~(B98$+yLz=heqxrRrD?&~VeCgIb zntq-g&K|DrE%l`K*V}ecHf!7?S@#&6Sj{J=`_?Z5Vgk_#Pe?~wj=fNC^j!SJblVnv zel~xZuP~C^s7z&Sgp``=d8|7PzZK87w}y$cVQ9-jtxV@s2ASXn)yoW`8nKL$>`Wp1 z3FGHm&D$G-!$sPuv!N%N@TOJv!83{KVY?q6p7_#n|N1Jqtuj_&iToJ<{%|JfWqP`^ zosxwz%z?-C`H`Y6bGY0A4RMTnJ{8AYm|%8?YFkx(c89O8ed*98_YiL9r^g4a1CNox6GWOP z+7F)6bh%FkTGaSTI4L~UZr8%BTn?f%d@gn)g@A0Omen%%*V7b4-DO4zR;8$`0zNA% zJz;^ji)BiuT0>6q6(cFxx7#D|*~`a1YrVc>eX#1d4VEOFCyZd3V=8qeB%?`!h8`=$ zUKeM6LPmS()-UCuK$+>hu_3K&8QyaU*JbJbc*|Q=Fr71#%sA3lsy!Fg zf}jnG&AyupX%1Ne;|fnNk{I($pF3{G6p*iSIa~n8Wk(kGVR{FYH1JH_L5>gFI@L;3 zh`3h1IQPdR_ElNrEzJzkAt4i0@fc0 zR%hGVEPTEh=HhyOiJ>z}p-Wp`=|v%gP9w1oi$K1-2iEbB;M7fyCk-;EEIO9;p}f4P zX7{7(vq?8;K5wHp@JeXgOyH|ZujQ|+6S~DZHxX`NI~BgRLJXTG8fA_K9&C*JN{`CG zo`t=9&IxS^^nn>ah5fZrWpE|0fU}E(duGu0k_2xwBcP!cnPe2&u<3qM-oa~9)oxs( z$CizjNY}=X3Hy?Zq(B@%dv<-cXQr2#Opl|extE5WPz+*lVcgYGbGO=_P}FLX44P9e z8+`(A4MeUTpCZ^dr8G_YX&Z)RCxC;2`BYC^C&091ScsDdsJVh$x{JOYspgR6=d5Oi%dL8h4Tj=LPZ^?1qqg7?vmwbNu>xv=wy%0w-U zquFn}uE+LXmZdlecE)3*4HCth-`^>KfUrnK75}VE*`L(ztEZR(RR5jQ5hUWSK7W9sb0$GnFxcp(C$XJDrP$oHyE`eJj3ZtOIcN=&*bTJ)P z5!t9QfPY>u5OyByG~3#;G2JYd=fY|D)}ZO^M|UCjiu*I7nn!MJKg;&TNf<_I$|GER zc*H_t&EbuoE%wM=_AkqxLi%M+%F3W^R^Cr54(wKk3h<*pWsZiYRG+%VuKkMSx6z4S zqFP&BQAOm$Yq1ZnZ)>Hh$oFxxPl)wP?P&~H;7gyIpft97y=OxXyToRLAIt>t$TFUd zmYM3$1g`Uz7*}T1PKv<#wXS-+xX&)Ch(AH4EoF_D(yDujQDS6@(&dkbviEpp&yVuF zR|e$Q#wz*umiiLEW=g7qbZ2+GyXf2VI_VwRGuC@C*RE7^`0G#FoxPwFy0_J|UNMKq zH|=RNpRcjQDeL)2C9J=6*0UrpH%kTbq%Io<79($q6Qn0s`& zn~eWGq|~UqnnA+l`|Uj^HHWY@vMP_CUv`Jo`MYM(=ZCMlzdymJlu+|(0=MxC`fiv6 zoF6Y|%CM^U*riNP`^~z_*xGu#BkcQL&a?z&NG`5~XttrxPhz4PZL+Dd+n=tj>;Z61 z-d*g;V0aOB^y8UtUF@a(buqCS|2(?1wPzwQagX`Ojnr9L)<;J_x32ktHBZXSYd(-@= z?r?LG14UE9jHi+s99h9YuaLp3BYN5_z@iWqSS-aihhH#=O^7uO$Q7rG?}w{r{wa*Z?kWUN(Ofye`u^mo)K$im~wNjW?Uf z+{n=H!C*%|gdf4j*Dt)tk`C(rW(;*jLGpq|*FtDHF~>_(u9S)B`KG%bJKp@Z+koRskoBxSQrzSJfKt-7Zt zv*=4xb~D2J&-BXK%;>mv)+Wqs=n6u>X4I>5G3B&+-+*)d8U6vi~z7oDN; zy7j<2zpxU)ksU0^SRN}?j^1m!V`=26)2j9&w8Ee0|z!!!Z&h8DJE*fk5aK;rC zn~VJN3pJCs&Sm)a`?r-_+8hHWn;1oqS=8oJg0xzBlH>d4^8uB8I;=mF|nd zE1r(A_W@pM(m{2NGQa+~!1QWZ;+@p_-IN!|?>_mXHBS0M6F^2K{VZJbQEExa%QHTU zdX(XIcg~J#GjTxG0%QZ>#O8g4Csqu@>(C!GXH~h|oSPH1WL&w4Vf#sWTG!~rJ#7c# zB-RP8w!n)VJky1pDEAwBry#|4h49xt>l{3pXT`LmKA)~iFzA(z>5Bz^Jw^1(2#qmBH=zrayOcWS1@sSAEt<+ucrxV8%oH~q900oMeF=yCXOU! zl%Ks-$|?0HKh&?GDCVMnLrkgc8}?VX3fD&VmapJn4?9MYqL0xw_*UI1SV3>&nAE}8>gRsFDbYVZDQ_C zIQR2=#}}vej>9U-gneA7F;9hkziI~6y=s}ee$rE|NHbs#?P~0_E1ObhuxJ>*KI+xv zR9@dYB>Ciysh-;GQoc!H${@)_2`U3^sMGB(N3($UGhny)53cD9}eslwuo zTXCw_eypAPB+IU}oX!g2jEgh~=#d?+-RVC=b?pSxh(XowWdBqhd9dTn7ikz+YI}Py zye@CE&(r>-V*kQ5YwZ`CPFFk=#iA3;9-gGkC7I|*Wd_+K&vciv%c9+Cd;%v6i;J!g zGx;3Md{p5LI+Rt8!@=Xb`d#OZ`&4;%1&fHi?3c5lH+WbET2BmU_YhSLK89&S;J`VU zdTfC83+8Bm5;6~b=!>OYpMWJ0oe@upof?{fWSqtSqo^GeH0VuzTgffRq%R3XG#8E{};G!}3;Vgy*Y+!XLB z8c0eo3|6k47jXLeeS%Hw5YNrSb7;hFQvW7`O5r(6DOov;*daUs!=Go*|6g5HywDbX zF?dq7#eIb*A3W;cd^xCTSkJ*Ds5T2vFf%&D=?AeRhQ!|nf%$N&gcuv8QbMU9)H|(A z33*>mPR?rQZ(InB&h_kI^dnfTv}&1fVUh_p3EW?~d2+Cs=(*V470usz3-IUIbXo&C zj9Y<&S8;V`Lq+pZ60#_w1b85#J6~QfC_Tzbb|Vz#z5PGY08F%;{d|ge*qGGfWighR z=5lDgjt9Pg_DtxBD=gJzyjlsMn629dc8n z5ogt@TGx!}J6jUPI1#p2c7k<+*30YjO%e8bIdZto+I6mz><;}|$-rY#2k=q=KI^^r z_eji6NwH7;mM{Szc%wt98$pIu;uwul^eCOE0$4m5`jHVIChOf&WkOIYiR_xTK!xuD zhoc+BMl%5zFls>QYe;BkNqM67olf2)>~7q>M5&(2=c5zFOw4*u4(dior9)jgNdRFl zCPfK}c8-h~oZx0%a#l~_5lEDZy2@$86weMO=HNC~Zf^YaBAty}>Mh;7Utf!}0MbZB zZUbiFm*ha}8Ba{$tMi(9-1NTfX%Q0=S$7ln>3cX;_^TYQmHxeF``$Xky6LfBn0Y~_ z5W?-{(tmT8P~3Cj`MBTyiY}+cQ5=8yy>7&fetHn^ov8Lk>(o!vBhgu?pDspI$(G@y zQj8ek zI-(+4!rMC7ND37h+D=dAhsrP-1NLtVblOm^5?PwKSD`l$;e5k6^7!LTKBYDAX){r` z86FT!FR(Ob?7^Bdi@2H{=Ha^TcnwdLFTPm&VCrO$k^gEv(!p2iY-KeeUDE1H&F|gv zfBpn%&+2~SeO`kPwohm5EKDt>_2->)?skz!{P$5^LrxfhnM$&MJa6A6@fkM6eIrR*bl5P z0HioIy~LNt-~;eH>9NT0-}F4cz_6$SaRMl%I-Go+7&BiS`4?lDLmn312GOu${r6>4pokGM3a*1kzQ)!Aqu5+x9Ri*-Vs&M`*E=&ce zg?zv5nXa4dx_{1AZY0NdcR`iYv^xLdwcB!8{_`Z9=5=LKM{|#=!O_Z>pZmS;mkJPP zo%P|OIB%fs!A@r`H?L1&QI73=euX7pHIe<_hddZixZG7g8_KO;sF848+*1We3|8X` z3k-c{Bggyg{Kj9$6MrBb^R3jl14Yrzf(3PVdCc5_iPhDaz+wl|qUUNsYIp6J6*o|J zF-hFUTMvcfIl8V#kW-U(SbBg=<1r?W0ckmA!rZLT;Dd1oHn*AgyNjvN^f@D7N_mk2 zd`pgWx!`jX`v8i1xsikka3pN^R|fT(yq)&O8ay79nAcZG9WNwS`~RA6V37Nb`oyei zkTsP}{;=cd7&;;Lm1B$$1l9!ELT^Ou;s%Z*_W z(*o$4@XdAk7;6Jj5K}&O$|%4sy9-?`fsC4H?_bBkOF=Mb2r`++v|8~?EF(9As7q&5-QH{L9wjCq zQEVwJtpB(+SzkF)tgGNWR>2F22Y*7KIvg0jIw@7LS*+Q;ne^BI=i&0m!rGtTJkq$(G9h?1)P> zt99;L=t?Yb9L!zfCi%5w8t3o=v$N-{B8ufVqqr+5GI=N)gdGRuz+`9|)*luj52rxV zNcyq2W}5;MS|pKm=DS9WjWl(e1-gCpzCH<&{hJ*t=>kEJ1GU2)uby zG)>^a(-jxx76mkK{S@=b;Wn-G=hWn$gU!jlxa3f<&_c`Y){~0q>u2P5&XjO1y$_aF zhl_rGMQ}s@g1{;kkrodRfUmu^CBQ)IdQ3v45$@iT(3>IF509FugLTH=Jn)UAWLp65 z;3YuXL7Y_Cu=9q!4btnZDSVb|IR>LNoUg)*4nKIX&)v3!J~>dxMeThdxXxP{eMGU^ z|NTr)Uw`w$6LM#X$zM=3DmCr#8!Dy!rV&G9OS`S1uruUX^bA)IFSJR+z7awk$A2jJMj*p>#kN@R(G|dE0!cvi{i1qmp^0r0rp5 zvN|{{eW`88b)vuyDn?DS=$UTyHie?$bHEH7@4f!F_g^Ky`GnSr*crL=p(-Hby@$&{?83$59US1(P2UJ+`ZC1S>*3S${|x?B zv}w_E>olBhu;AJ}h~Jz#=?kjwYfC!Gw%uZuDAC0W2y_GI~X% zE@-=D?DYPMxmydHX1cIPkHBxPj7tzB_dUlb?;iQ_`j?5lWCEw?qe||4*MiBOE{`1mE-uXQ8<7HVMaPSdPRhSY2pvcsCx{uPo zbz@2?`}h8k$V7u`mT2A`?l_U(C<><_megGUP$VoXwl}OTn+9zj-Oe|xuo#aSy%Dq_ zP@-25#W11GfCU|jD#8y$B4?{9)pk8GMbXVy+c46)r;XlXY;IKQJ*iw_p}QQ;0i zb7XBw(8pqB2A7-;8MQS7HW~;Z82Wt~$(1p|A*O~DKZ&aP4T7dX&p0&4KE-Cj$D|Y^ ze?4iZEF7rIpabH>vLDH2ypbo*>-yl={TQ)2s6iLh`WS+qh+^~G>r@>1&G+B^DX_C( z0JDH^P)|I@%);FY=$1X3pYqrp2EPg<%{ya9D$IrL0VEV7U!z}Kz>Wb%k$AHL86|lbNyNIQI#-e2{T&y@$s2b|0YOKnED0neuGND=ucB@1RIQj5%2ox`4!J$O|i(V zrOpho%J=7}^}1akWi;i?TLNoMzyL?TeincoE*P+D%V}0q9CJf@qWh&vfbUI8Y@8~n zcU*LWfSXbPNQqv)%9YiL+R|VEtfJMi^Z(E@e-=4ZfV{E=Cb6$?zEm^CUquww&x#4a zicte+#B~7_{JO>w4g~>wOM1<|uB#wFX9DrUPDpz7ww+JD@5Xn56Az5`V8!lH3+}n` z1hbr#)8oCxL1m^|KiAn-OOQy}n8Hrq3ByJ;WAXX7@-uy2%s&jn9>E<4U@CV zh@=xhmWK;96KQzOFr29GaJ=k2xTbTo+`PbNeKfa-uNhQvu+#th#rhxY|9ma(^&uDR zpl`v;Jb8G)GZZc2dC=EWEOml?e^YGW{iQcwb0YtJBr*QO3gHBnjK)&{*?#w03AO^& zP7aVkY#-0{w_tbRl5G{>w zXdUq})0Mjmrb}X-Pl>q>R}ACD43O2}V$Lt*CV-R2xAjQ=pDZh0=w_`B37!Y3Mz)Of zS9-;XI#-6@060T)KYwZSJP(PTJua9O&c&D5&ojWn31~(;N@G_7WE3yXBUbEe>j5s# zY{{Boy%P!$3$k`dB$kM}fo4mvG_b-_>T`oM(TQSncCtM}PvN`Pc27GU)k(dsf8`Dx#sOw1n`Kp65=V@)H>6I99W^P!Ja@$ zc4f<=14;<+vW*F)uzu9sdxAS>*V>K#@-V!x9!7)vzpt;@=5v%F=mQaVp9|M`eMk4L zVd+06h*{qd%-Osio(#@o5AawDz-Qb>)<4Va?TT0aQ8I5=`R`ZYy(1=z>WPL;vqvWa zfnFZ#0c9~>aEbx{Zt4V4D&r9{7ZwpQtY}3!HzG&P8JB_X=oH2+Wh(bsq3X+)g{mcU zQP-F|pog;MSu4h?P=jXXB(JpR?eR9@zCh)&G0_md`}9*`_v3A+e$ zEs+8h1dOR-TH}~-?1_rMsZhp4C*fT@<^GX;_cJY21tg3`;6!7Ml>{d|BU!Wc0S5S%j9XYX#EvoA3chmon&b)W%bX`g7ZS`>?F8naGx)!>IFB+JPeNYMEB(r^~vE=JppG-h|yU!7?iTxqMllq|vr8w72$+l78mJg3{-VBvj8w zvy-_DUxMPV&ZQetj^}8inJ`hkY^82>lb`(VN>eAl@E$cEukN>|qCOS}YHE&w(pt#V zzDyBV%Kql$*w?+VyGa&}o@tw40o@=CU@a}%D7pOe5tviclxGj`YF5U%hVpzAV*<(x zNMtOd09`X|puMQt{m0$rzVUKdx{`6r>=r5sQ#p>nm9XnTQp2V%Whz$L_t9etoi)zi zx`~Db1MwX#_hohFUgWSWg3X}PAXjsKQA}<4sp1+!C4fYhn5%{`7g=NRLrr*31DYjqDWq8m+Pp(v3>Y7U`>9mZ;Ev5N#G%OMsM+{*jCKn3nU zi!IX3un#30a#qThiB58mp7DYOMO}<^;h$?uS=tM0`|MDTFAo$%lQ)8;JQ(fwJs9vQ zB(N7}dS9Oa%b=VGid~?_70dy87WA^6F2Rf)6EZSm*%K<3`^dc7OJ&GsOrbkITT|o9 z=o=-b>vexkrloG_>6fgNYj+=E@!$gq=)Zb>@dW!tibGCk&mwj3J~r361E_Qt-1yBf zUxVD~WqI$fnpR&T~OBL08L%Vp#QBCfDjM3x0ahvzc9vS zqNZzxJpvd=6eHDo*~|CZ-?W47oqS;Dc#c{6bpce_P(BWpL2QGx5f56lx&fd*s%tx5 zVi5P=$pO6-f5g;)GO^RcL?d@Vp(l}!^NHFr39{(pe)$vc2f*weYW8!(_>WgP^%OB4 z8St&EdyuQc9XF)cRP@klseHV~mOjZAUVS3I;*3ttXP~bn0W=PF0ZFhSMF}iBz>RLH zuE|3Z2|x|hk4hNSgCp!f|HXmOYDpWfd0i*)PHn-N)GO9`le4*A(fpNwm7K39g*Q?8 zMu1{gD9UN?=-vzZapdW;EII+mi;Qr)?Newjkj_Jenhd7Zwvppr<94ZCVAk5ruZX^a zHDxnQ(7b_$#H#?x$1GAp7NJ4m z4`NUNbc}s|K@n~k(C;Uv7?K{ayO037I8;HmBb&E58EDkXV80atBuI78AzU07IRWp9 zxM73K#o)y}9s2@^057mwn@OGnOZF1!TY$nT*$1br;B{BO6BIK@@vIv9L|`gDNM}; z)AF&%!MmMzc;CE(Sqt?brhP-!(?76%$7L0yHEJ|ry$eU|5Q`P%Jv zf_$(m((fZzi?Y+FfYqy>R&A$ozxIZZ;{XI>I!pcfL^aBH4BB89K@ln*EM#&f6?98T zYT`vGf}=*srYcQr`>-7tOAEjnvd^b6X|Y-!qI)KLx}z{q`)*(pFs3d53%4)@=*w;c zIT6BcGgJAX0K);SqL+rHMqT)qrS4ax63RR)EMNDgmjg`-d@T23;xph)U3_(*=~9Sg zXhOig_nKFWX5}wJKHyH>9ymf90g0h;Sz3KK?<*SpQaEA&mr zO7{)2msYDEbX+PiD0!*sP*a}A(?V1fw>g;iY(?Y_xl7Z^)e!PP`Vk;V><*rs>(@FP z4$2jh*c kRf!Xe!CV#Q2=oQ}rz14p9kX!E|9zmUq@`GK_x{8G0L?YTCjbBd literal 0 HcmV?d00001 diff --git a/docs/mailing_lists.rst b/docs/mailing_lists.rst deleted file mode 100644 index 106f1da48f89..000000000000 --- a/docs/mailing_lists.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. _mailing_lists: - -Mailing Lists -============= - - * `LLVM Announcements List - `_ - - This is a low volume list that provides important announcements regarding - LLVM. It gets email about once a month. - - * `Developer's List `_ - - This list is for people who want to be included in technical discussions of - LLVM. People post to this list when they have questions about writing code - for or using the LLVM tools. It is relatively low volume. - - * `Bugs & Patches Archive `_ - - This list gets emailed every time a bug is opened and closed, and when people - submit patches to be included in LLVM. It is higher volume than the LLVMdev - list. - - * `Commits Archive `_ - - This list contains all commit messages that are made when LLVM developers - commit code changes to the repository. It is useful for those who want to - stay on the bleeding edge of LLVM development. This list is very high volume. - - * `Test Results Archive - `_ - - A message is automatically sent to this list by every active nightly tester - when it completes. As such, this list gets email several times each day, - making it a high volume list. diff --git a/docs/programming.rst b/docs/programming.rst deleted file mode 100644 index c4eec59417e8..000000000000 --- a/docs/programming.rst +++ /dev/null @@ -1,57 +0,0 @@ -.. _programming: - -Programming Documentation -========================= - -.. toctree:: - :hidden: - - Atomics - CodingStandards - CommandLine - CompilerWriterInfo - ExtendingLLVM - HowToSetUpLLVMStyleRTTI - -* `LLVM Language Reference Manual `_ - - Defines the LLVM intermediate representation and the assembly form of the - different nodes. - -* :ref:`atomics` - - Information about LLVM's concurrency model. - -* `The LLVM Programmers Manual `_ - - Introduction to the general layout of the LLVM sourcebase, important classes - and APIs, and some tips & tricks. - -* :ref:`commandline` - - Provides information on using the command line parsing library. - -* :ref:`coding_standards` - - Details the LLVM coding standards and provides useful information on writing - efficient C++ code. - -* :doc:`HowToSetUpLLVMStyleRTTI` - - How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your - class hierarchy. - -* :ref:`extending_llvm` - - Look here to see how to add instructions and intrinsics to LLVM. - -* `Doxygen generated documentation `_ - - (`classes `_) - (`tarball `_) - -* `ViewVC Repository Browser `_ - -* :ref:`compiler_writer_info` - - A list of helpful links for compiler writers. diff --git a/docs/subsystems.rst b/docs/subsystems.rst deleted file mode 100644 index 80d0eed66339..000000000000 --- a/docs/subsystems.rst +++ /dev/null @@ -1,106 +0,0 @@ -.. _subsystems: - -Subsystem Documentation -======================= - -.. toctree:: - :hidden: - - AliasAnalysis - BitCodeFormat - BranchWeightMetadata - Bugpoint - CodeGenerator - ExceptionHandling - LinkTimeOptimization - SegmentedStacks - TableGenFundamentals - DebuggingJITedCode - GoldPlugin - MarkedUpDisassembly - -* `Writing an LLVM Pass `_ - - Information on how to write LLVM transformations and analyses. - -* `Writing an LLVM Backend `_ - - Information on how to write LLVM backends for machine targets. - -* :ref:`code_generator` - - The design and implementation of the LLVM code generator. Useful if you are - working on retargetting LLVM to a new architecture, designing a new codegen - pass, or enhancing existing components. - -* :ref:`tablegen` - - Describes the TableGen tool, which is used heavily by the LLVM code - generator. - -* :ref:`alias_analysis` - - Information on how to write a new alias analysis implementation or how to - use existing analyses. - -* `Accurate Garbage Collection with LLVM `_ - - The interfaces source-language compilers should use for compiling GC'd - programs. - -* `Source Level Debugging with LLVM `_ - - This document describes the design and philosophy behind the LLVM - source-level debugger. - -* :ref:`exception_handling` - - This document describes the design and implementation of exception handling - in LLVM. - -* :ref:`bugpoint` - - Automatic bug finder and test-case reducer description and usage - information. - -* :ref:`bitcode_format` - - This describes the file format and encoding used for LLVM "bc" files. - -* `System Library `_ - - This document describes the LLVM System Library (lib/System) and - how to keep LLVM source code portable - -* :ref:`lto` - - This document describes the interface between LLVM intermodular optimizer - and the linker and its design - -* :ref:`gold-plugin` - - How to build your programs with link-time optimization on Linux. - -* :ref:`debugging-jited-code` - - How to debug JITed code with GDB. - -* :ref:`branch_weight` - - Provides information about Branch Prediction Information. - -* :ref:`segmented_stacks` - - This document describes segmented stacks and how they are used in LLVM. - -* `Howto: Implementing LLVM Integrated Assembler`_ - - A simple guide for how to implement an LLVM integrated assembler for an - architecture. - -.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html - -* :ref:`marked_up_disassembly` - - This document describes the optional rich disassembly output syntax. - diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html deleted file mode 100644 index 717454f392e5..000000000000 --- a/docs/tutorial/LangImpl1.html +++ /dev/null @@ -1,348 +0,0 @@ - - - - - Kaleidoscope: Tutorial Introduction and the Lexer - - - - - - - -

Kaleidoscope: Tutorial Introduction and the Lexer

- - - -
-

Written by Chris Lattner

-
- - -

Tutorial Introduction

- - -
- -

Welcome to the "Implementing a language with LLVM" tutorial. This tutorial -runs through the implementation of a simple language, showing how fun and -easy it can be. This tutorial will get you up and started as well as help to -build a framework you can extend to other languages. The code in this tutorial -can also be used as a playground to hack on other LLVM specific things. -

- -

-The goal of this tutorial is to progressively unveil our language, describing -how it is built up over time. This will let us cover a fairly broad range of -language design and LLVM-specific usage issues, showing and explaining the code -for it all along the way, without overwhelming you with tons of details up -front.

- -

It is useful to point out ahead of time that this tutorial is really about -teaching compiler techniques and LLVM specifically, not about teaching -modern and sane software engineering principles. In practice, this means that -we'll take a number of shortcuts to simplify the exposition. For example, the -code leaks memory, uses global variables all over the place, doesn't use nice -design patterns like visitors, etc... but it -is very simple. If you dig in and use the code as a basis for future projects, -fixing these deficiencies shouldn't be hard.

- -

I've tried to put this tutorial together in a way that makes chapters easy to -skip over if you are already familiar with or are uninterested in the various -pieces. The structure of the tutorial is: -

- -
    -
  • Chapter #1: Introduction to the Kaleidoscope -language, and the definition of its Lexer - This shows where we are going -and the basic functionality that we want it to do. In order to make this -tutorial maximally understandable and hackable, we choose to implement -everything in C++ instead of using lexer and parser generators. LLVM obviously -works just fine with such tools, feel free to use one if you prefer.
  • -
  • Chapter #2: Implementing a Parser and -AST - With the lexer in place, we can talk about parsing techniques and -basic AST construction. This tutorial describes recursive descent parsing and -operator precedence parsing. Nothing in Chapters 1 or 2 is LLVM-specific, -the code doesn't even link in LLVM at this point. :)
  • -
  • Chapter #3: Code generation to LLVM IR - -With the AST ready, we can show off how easy generation of LLVM IR really -is.
  • -
  • Chapter #4: Adding JIT and Optimizer -Support - Because a lot of people are interested in using LLVM as a JIT, -we'll dive right into it and show you the 3 lines it takes to add JIT support. -LLVM is also useful in many other ways, but this is one simple and "sexy" way -to shows off its power. :)
  • -
  • Chapter #5: Extending the Language: Control -Flow - With the language up and running, we show how to extend it with -control flow operations (if/then/else and a 'for' loop). This gives us a chance -to talk about simple SSA construction and control flow.
  • -
  • Chapter #6: Extending the Language: -User-defined Operators - This is a silly but fun chapter that talks about -extending the language to let the user program define their own arbitrary -unary and binary operators (with assignable precedence!). This lets us build a -significant piece of the "language" as library routines.
  • -
  • Chapter #7: Extending the Language: Mutable -Variables - This chapter talks about adding user-defined local variables -along with an assignment operator. The interesting part about this is how -easy and trivial it is to construct SSA form in LLVM: no, LLVM does not -require your front-end to construct SSA form!
  • -
  • Chapter #8: Conclusion and other useful LLVM -tidbits - This chapter wraps up the series by talking about potential -ways to extend the language, but also includes a bunch of pointers to info about -"special topics" like adding garbage collection support, exceptions, debugging, -support for "spaghetti stacks", and a bunch of other tips and tricks.
  • - -
- -

By the end of the tutorial, we'll have written a bit less than 700 lines of -non-comment, non-blank, lines of code. With this small amount of code, we'll -have built up a very reasonable compiler for a non-trivial language including -a hand-written lexer, parser, AST, as well as code generation support with a JIT -compiler. While other systems may have interesting "hello world" tutorials, -I think the breadth of this tutorial is a great testament to the strengths of -LLVM and why you should consider it if you're interested in language or compiler -design.

- -

A note about this tutorial: we expect you to extend the language and play -with it on your own. Take the code and go crazy hacking away at it, compilers -don't need to be scary creatures - it can be a lot of fun to play with -languages!

- -
- - -

The Basic Language

- - -
- -

This tutorial will be illustrated with a toy language that we'll call -"Kaleidoscope" (derived -from "meaning beautiful, form, and view"). -Kaleidoscope is a procedural language that allows you to define functions, use -conditionals, math, etc. Over the course of the tutorial, we'll extend -Kaleidoscope to support the if/then/else construct, a for loop, user defined -operators, JIT compilation with a simple command line interface, etc.

- -

Because we want to keep things simple, the only datatype in Kaleidoscope is a -64-bit floating point type (aka 'double' in C parlance). As such, all values -are implicitly double precision and the language doesn't require type -declarations. This gives the language a very nice and simple syntax. For -example, the following simple example computes Fibonacci numbers:

- -
-
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x < 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-
-
- -

We also allow Kaleidoscope to call into standard library functions (the LLVM -JIT makes this completely trivial). This means that you can use the 'extern' -keyword to define a function before you use it (this is also useful for mutually -recursive functions). For example:

- -
-
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-
-
- -

A more interesting example is included in Chapter 6 where we write a little -Kaleidoscope application that displays -a Mandelbrot Set at various levels of magnification.

- -

Lets dive into the implementation of this language!

- -
- - -

The Lexer

- - -
- -

When it comes to implementing a language, the first thing needed is -the ability to process a text file and recognize what it says. The traditional -way to do this is to use a "lexer" (aka 'scanner') -to break the input up into "tokens". Each token returned by the lexer includes -a token code and potentially some metadata (e.g. the numeric value of a number). -First, we define the possibilities: -

- -
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-
- -

Each token returned by our lexer will either be one of the Token enum values -or it will be an 'unknown' character like '+', which is returned as its ASCII -value. If the current token is an identifier, the IdentifierStr -global variable holds the name of the identifier. If the current token is a -numeric literal (like 1.0), NumVal holds its value. Note that we use -global variables for simplicity, this is not the best choice for a real language -implementation :). -

- -

The actual implementation of the lexer is a single function named -gettok. The gettok function is called to return the next token -from standard input. Its definition starts as:

- -
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-
- -

-gettok works by calling the C getchar() function to read -characters one at a time from standard input. It eats them as it recognizes -them and stores the last character read, but not processed, in LastChar. The -first thing that it has to do is ignore whitespace between tokens. This is -accomplished with the loop above.

- -

The next thing gettok needs to do is recognize identifiers and -specific keywords like "def". Kaleidoscope does this with this simple loop:

- -
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-
- -

Note that this code sets the 'IdentifierStr' global whenever it -lexes an identifier. Also, since language keywords are matched by the same -loop, we handle them here inline. Numeric values are similar:

- -
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-
- -

This is all pretty straight-forward code for processing input. When reading -a numeric value from input, we use the C strtod function to convert it -to a numeric value that we store in NumVal. Note that this isn't doing -sufficient error checking: it will incorrectly read "1.23.45.67" and handle it as -if you typed in "1.23". Feel free to extend it :). Next we handle comments: -

- -
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-
-
- -

We handle comments by skipping to the end of the line and then return the -next token. Finally, if the input doesn't match one of the above cases, it is -either an operator character like '+' or the end of the file. These are handled -with this code:

- -
-
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-  
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-
- -

With this, we have the complete lexer for the basic Kaleidoscope language -(the full code listing for the Lexer is -available in the next chapter of the tutorial). -Next we'll build a simple parser that uses this to -build an Abstract Syntax Tree. When we have that, we'll include a driver -so that you can use the lexer and parser together. -

- -Next: Implementing a Parser and AST -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $ -
- - diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst new file mode 100644 index 000000000000..aa619cf19f26 --- /dev/null +++ b/docs/tutorial/LangImpl1.rst @@ -0,0 +1,278 @@ +================================================= +Kaleidoscope: Tutorial Introduction and the Lexer +================================================= + +.. contents:: + :local: + +Tutorial Introduction +===================== + +Welcome to the "Implementing a language with LLVM" tutorial. This +tutorial runs through the implementation of a simple language, showing +how fun and easy it can be. This tutorial will get you up and started as +well as help to build a framework you can extend to other languages. The +code in this tutorial can also be used as a playground to hack on other +LLVM specific things. + +The goal of this tutorial is to progressively unveil our language, +describing how it is built up over time. This will let us cover a fairly +broad range of language design and LLVM-specific usage issues, showing +and explaining the code for it all along the way, without overwhelming +you with tons of details up front. + +It is useful to point out ahead of time that this tutorial is really +about teaching compiler techniques and LLVM specifically, *not* about +teaching modern and sane software engineering principles. In practice, +this means that we'll take a number of shortcuts to simplify the +exposition. For example, the code leaks memory, uses global variables +all over the place, doesn't use nice design patterns like +`visitors `_, etc... but +it is very simple. If you dig in and use the code as a basis for future +projects, fixing these deficiencies shouldn't be hard. + +I've tried to put this tutorial together in a way that makes chapters +easy to skip over if you are already familiar with or are uninterested +in the various pieces. The structure of the tutorial is: + +- `Chapter #1 <#language>`_: Introduction to the Kaleidoscope + language, and the definition of its Lexer - This shows where we are + going and the basic functionality that we want it to do. In order to + make this tutorial maximally understandable and hackable, we choose + to implement everything in C++ instead of using lexer and parser + generators. LLVM obviously works just fine with such tools, feel free + to use one if you prefer. +- `Chapter #2 `_: Implementing a Parser and AST - + With the lexer in place, we can talk about parsing techniques and + basic AST construction. This tutorial describes recursive descent + parsing and operator precedence parsing. Nothing in Chapters 1 or 2 + is LLVM-specific, the code doesn't even link in LLVM at this point. + :) +- `Chapter #3 `_: Code generation to LLVM IR - With + the AST ready, we can show off how easy generation of LLVM IR really + is. +- `Chapter #4 `_: Adding JIT and Optimizer Support + - Because a lot of people are interested in using LLVM as a JIT, + we'll dive right into it and show you the 3 lines it takes to add JIT + support. LLVM is also useful in many other ways, but this is one + simple and "sexy" way to shows off its power. :) +- `Chapter #5 `_: Extending the Language: Control + Flow - With the language up and running, we show how to extend it + with control flow operations (if/then/else and a 'for' loop). This + gives us a chance to talk about simple SSA construction and control + flow. +- `Chapter #6 `_: Extending the Language: + User-defined Operators - This is a silly but fun chapter that talks + about extending the language to let the user program define their own + arbitrary unary and binary operators (with assignable precedence!). + This lets us build a significant piece of the "language" as library + routines. +- `Chapter #7 `_: Extending the Language: Mutable + Variables - This chapter talks about adding user-defined local + variables along with an assignment operator. The interesting part + about this is how easy and trivial it is to construct SSA form in + LLVM: no, LLVM does *not* require your front-end to construct SSA + form! +- `Chapter #8 `_: Conclusion and other useful LLVM + tidbits - This chapter wraps up the series by talking about + potential ways to extend the language, but also includes a bunch of + pointers to info about "special topics" like adding garbage + collection support, exceptions, debugging, support for "spaghetti + stacks", and a bunch of other tips and tricks. + +By the end of the tutorial, we'll have written a bit less than 700 lines +of non-comment, non-blank, lines of code. With this small amount of +code, we'll have built up a very reasonable compiler for a non-trivial +language including a hand-written lexer, parser, AST, as well as code +generation support with a JIT compiler. While other systems may have +interesting "hello world" tutorials, I think the breadth of this +tutorial is a great testament to the strengths of LLVM and why you +should consider it if you're interested in language or compiler design. + +A note about this tutorial: we expect you to extend the language and +play with it on your own. Take the code and go crazy hacking away at it, +compilers don't need to be scary creatures - it can be a lot of fun to +play with languages! + +The Basic Language +================== + +This tutorial will be illustrated with a toy language that we'll call +"`Kaleidoscope `_" (derived +from "meaning beautiful, form, and view"). Kaleidoscope is a procedural +language that allows you to define functions, use conditionals, math, +etc. Over the course of the tutorial, we'll extend Kaleidoscope to +support the if/then/else construct, a for loop, user defined operators, +JIT compilation with a simple command line interface, etc. + +Because we want to keep things simple, the only datatype in Kaleidoscope +is a 64-bit floating point type (aka 'double' in C parlance). As such, +all values are implicitly double precision and the language doesn't +require type declarations. This gives the language a very nice and +simple syntax. For example, the following simple example computes +`Fibonacci numbers: `_ + +:: + + # Compute the x'th fibonacci number. + def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2) + + # This expression will compute the 40th number. + fib(40) + +We also allow Kaleidoscope to call into standard library functions (the +LLVM JIT makes this completely trivial). This means that you can use the +'extern' keyword to define a function before you use it (this is also +useful for mutually recursive functions). For example: + +:: + + extern sin(arg); + extern cos(arg); + extern atan2(arg1 arg2); + + atan2(sin(.4), cos(42)) + +A more interesting example is included in Chapter 6 where we write a +little Kaleidoscope application that `displays a Mandelbrot +Set `_ at various levels of magnification. + +Lets dive into the implementation of this language! + +The Lexer +========= + +When it comes to implementing a language, the first thing needed is the +ability to process a text file and recognize what it says. The +traditional way to do this is to use a +"`lexer `_" (aka +'scanner') to break the input up into "tokens". Each token returned by +the lexer includes a token code and potentially some metadata (e.g. the +numeric value of a number). First, we define the possibilities: + +.. code-block:: c++ + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + +Each token returned by our lexer will either be one of the Token enum +values or it will be an 'unknown' character like '+', which is returned +as its ASCII value. If the current token is an identifier, the +``IdentifierStr`` global variable holds the name of the identifier. If +the current token is a numeric literal (like 1.0), ``NumVal`` holds its +value. Note that we use global variables for simplicity, this is not the +best choice for a real language implementation :). + +The actual implementation of the lexer is a single function named +``gettok``. The ``gettok`` function is called to return the next token +from standard input. Its definition starts as: + +.. code-block:: c++ + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + +``gettok`` works by calling the C ``getchar()`` function to read +characters one at a time from standard input. It eats them as it +recognizes them and stores the last character read, but not processed, +in LastChar. The first thing that it has to do is ignore whitespace +between tokens. This is accomplished with the loop above. + +The next thing ``gettok`` needs to do is recognize identifiers and +specific keywords like "def". Kaleidoscope does this with this simple +loop: + +.. code-block:: c++ + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + +Note that this code sets the '``IdentifierStr``' global whenever it +lexes an identifier. Also, since language keywords are matched by the +same loop, we handle them here inline. Numeric values are similar: + +.. code-block:: c++ + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + +This is all pretty straight-forward code for processing input. When +reading a numeric value from input, we use the C ``strtod`` function to +convert it to a numeric value that we store in ``NumVal``. Note that +this isn't doing sufficient error checking: it will incorrectly read +"1.23.45.67" and handle it as if you typed in "1.23". Feel free to +extend it :). Next we handle comments: + +.. code-block:: c++ + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + +We handle comments by skipping to the end of the line and then return +the next token. Finally, if the input doesn't match one of the above +cases, it is either an operator character like '+' or the end of the +file. These are handled with this code: + +.. code-block:: c++ + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + +With this, we have the complete lexer for the basic Kaleidoscope +language (the `full code listing `_ for the Lexer +is available in the `next chapter `_ of the tutorial). +Next we'll `build a simple parser that uses this to build an Abstract +Syntax Tree `_. When we have that, we'll include a +driver so that you can use the lexer and parser together. + +`Next: Implementing a Parser and AST `_ + diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html deleted file mode 100644 index 694f7342d38b..000000000000 --- a/docs/tutorial/LangImpl2.html +++ /dev/null @@ -1,1231 +0,0 @@ - - - - - Kaleidoscope: Implementing a Parser and AST - - - - - - - -

Kaleidoscope: Implementing a Parser and AST

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 2 Introduction

- - -
- -

Welcome to Chapter 2 of the "Implementing a language -with LLVM" tutorial. This chapter shows you how to use the lexer, built in -Chapter 1, to build a full parser for -our Kaleidoscope language. Once we have a parser, we'll define and build an Abstract Syntax -Tree (AST).

- -

The parser we will build uses a combination of Recursive Descent -Parsing and Operator-Precedence -Parsing to parse the Kaleidoscope language (the latter for -binary expressions and the former for everything else). Before we get to -parsing though, lets talk about the output of the parser: the Abstract Syntax -Tree.

- -
- - -

The Abstract Syntax Tree (AST)

- - -
- -

The AST for a program captures its behavior in such a way that it is easy for -later stages of the compiler (e.g. code generation) to interpret. We basically -want one object for each construct in the language, and the AST should closely -model the language. In Kaleidoscope, we have expressions, a prototype, and a -function object. We'll start with expressions first:

- -
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-
-
- -

The code above shows the definition of the base ExprAST class and one -subclass which we use for numeric literals. The important thing to note about -this code is that the NumberExprAST class captures the numeric value of the -literal as an instance variable. This allows later phases of the compiler to -know what the stored numeric value is.

- -

Right now we only create the AST, so there are no useful accessor methods on -them. It would be very easy to add a virtual method to pretty print the code, -for example. Here are the other expression AST node definitions that we'll use -in the basic form of the Kaleidoscope language: -

- -
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-};
-
-
- -

This is all (intentionally) rather straight-forward: variables capture the -variable name, binary operators capture their opcode (e.g. '+'), and calls -capture a function name as well as a list of any argument expressions. One thing -that is nice about our AST is that it captures the language features without -talking about the syntax of the language. Note that there is no discussion about -precedence of binary operators, lexical structure, etc.

- -

For our basic language, these are all of the expression nodes we'll define. -Because it doesn't have conditional control flow, it isn't Turing-complete; -we'll fix that in a later installment. The two things we need next are a way -to talk about the interface to a function, and a way to talk about functions -themselves:

- -
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-};
-
-
- -

In Kaleidoscope, functions are typed with just a count of their arguments. -Since all values are double precision floating point, the type of each argument -doesn't need to be stored anywhere. In a more aggressive and realistic -language, the "ExprAST" class would probably have a type field.

- -

With this scaffolding, we can now talk about parsing expressions and function -bodies in Kaleidoscope.

- -
- - -

Parser Basics

- - -
- -

Now that we have an AST to build, we need to define the parser code to build -it. The idea here is that we want to parse something like "x+y" (which is -returned as three tokens by the lexer) into an AST that could be generated with -calls like this:

- -
-
-  ExprAST *X = new VariableExprAST("x");
-  ExprAST *Y = new VariableExprAST("y");
-  ExprAST *Result = new BinaryExprAST('+', X, Y);
-
-
- -

In order to do this, we'll start by defining some basic helper routines:

- -
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-
- -

-This implements a simple token buffer around the lexer. This allows -us to look one token ahead at what the lexer is returning. Every function in -our parser will assume that CurTok is the current token that needs to be -parsed.

- -
-
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-
- -

-The Error routines are simple helper routines that our parser will use -to handle errors. The error recovery in our parser will not be the best and -is not particular user-friendly, but it will be enough for our tutorial. These -routines make it easier to handle errors in routines that have various return -types: they always return null.

- -

With these basic helper functions, we can implement the first -piece of our grammar: numeric literals.

- -
- - -

Basic Expression Parsing

- - -
- -

We start with numeric literals, because they are the simplest to process. -For each production in our grammar, we'll define a function which parses that -production. For numeric literals, we have: -

- -
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-
- -

This routine is very simple: it expects to be called when the current token -is a tok_number token. It takes the current number value, creates -a NumberExprAST node, advances the lexer to the next token, and finally -returns.

- -

There are some interesting aspects to this. The most important one is that -this routine eats all of the tokens that correspond to the production and -returns the lexer buffer with the next token (which is not part of the grammar -production) ready to go. This is a fairly standard way to go for recursive -descent parsers. For a better example, the parenthesis operator is defined like -this:

- -
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-
- -

This function illustrates a number of interesting things about the -parser:

- -

-1) It shows how we use the Error routines. When called, this function expects -that the current token is a '(' token, but after parsing the subexpression, it -is possible that there is no ')' waiting. For example, if the user types in -"(4 x" instead of "(4)", the parser should emit an error. Because errors can -occur, the parser needs a way to indicate that they happened: in our parser, we -return null on an error.

- -

2) Another interesting aspect of this function is that it uses recursion by -calling ParseExpression (we will soon see that ParseExpression can call -ParseParenExpr). This is powerful because it allows us to handle -recursive grammars, and keeps each production very simple. Note that -parentheses do not cause construction of AST nodes themselves. While we could -do it this way, the most important role of parentheses are to guide the parser -and provide grouping. Once the parser constructs the AST, parentheses are not -needed.

- -

The next simple production is for handling variable references and function -calls:

- -
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-
- -

This routine follows the same style as the other routines. (It expects to be -called if the current token is a tok_identifier token). It also has -recursion and error handling. One interesting aspect of this is that it uses -look-ahead to determine if the current identifier is a stand alone -variable reference or if it is a function call expression. It handles this by -checking to see if the token after the identifier is a '(' token, constructing -either a VariableExprAST or CallExprAST node as appropriate. -

- -

Now that we have all of our simple expression-parsing logic in place, we can -define a helper function to wrap it together into one entry point. We call this -class of expressions "primary" expressions, for reasons that will become more -clear later in the tutorial. In order to -parse an arbitrary primary expression, we need to determine what sort of -expression it is:

- -
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-
- -

Now that you see the definition of this function, it is more obvious why we -can assume the state of CurTok in the various functions. This uses look-ahead -to determine which sort of expression is being inspected, and then parses it -with a function call.

- -

Now that basic expressions are handled, we need to handle binary expressions. -They are a bit more complex.

- -
- - -

Binary Expression Parsing

- - -
- -

Binary expressions are significantly harder to parse because they are often -ambiguous. For example, when given the string "x+y*z", the parser can choose -to parse it as either "(x+y)*z" or "x+(y*z)". With common definitions from -mathematics, we expect the later parse, because "*" (multiplication) has -higher precedence than "+" (addition).

- -

There are many ways to handle this, but an elegant and efficient way is to -use Operator-Precedence -Parsing. This parsing technique uses the precedence of binary operators to -guide recursion. To start with, we need a table of precedences:

- -
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-    
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-  ...
-}
-
-
- -

For the basic form of Kaleidoscope, we will only support 4 binary operators -(this can obviously be extended by you, our brave and intrepid reader). The -GetTokPrecedence function returns the precedence for the current token, -or -1 if the token is not a binary operator. Having a map makes it easy to add -new operators and makes it clear that the algorithm doesn't depend on the -specific operators involved, but it would be easy enough to eliminate the map -and do the comparisons in the GetTokPrecedence function. (Or just use -a fixed-size array).

- -

With the helper above defined, we can now start parsing binary expressions. -The basic idea of operator precedence parsing is to break down an expression -with potentially ambiguous binary operators into pieces. Consider ,for example, -the expression "a+b+(c+d)*e*f+g". Operator precedence parsing considers this -as a stream of primary expressions separated by binary operators. As such, -it will first parse the leading primary expression "a", then it will see the -pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g]. Note that because parentheses -are primary expressions, the binary expression parser doesn't need to worry -about nested subexpressions like (c+d) at all. -

- -

-To start, an expression is a primary expression potentially followed by a -sequence of [binop,primaryexpr] pairs:

- -
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-
- -

ParseBinOpRHS is the function that parses the sequence of pairs for -us. It takes a precedence and a pointer to an expression for the part that has been -parsed so far. Note that "x" is a perfectly valid expression: As such, "binoprhs" is -allowed to be empty, in which case it returns the expression that is passed into -it. In our example above, the code passes the expression for "a" into -ParseBinOpRHS and the current token is "+".

- -

The precedence value passed into ParseBinOpRHS indicates the -minimal operator precedence that the function is allowed to eat. For -example, if the current pair stream is [+, x] and ParseBinOpRHS is -passed in a precedence of 40, it will not consume any tokens (because the -precedence of '+' is only 20). With this in mind, ParseBinOpRHS starts -with:

- -
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-
-
- -

This code gets the precedence of the current token and checks to see if if is -too low. Because we defined invalid tokens to have a precedence of -1, this -check implicitly knows that the pair-stream ends when the token stream runs out -of binary operators. If this check succeeds, we know that the token is a binary -operator and that it will be included in this expression:

- -
-
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-
-
- -

As such, this code eats (and remembers) the binary operator and then parses -the primary expression that follows. This builds up the whole pair, the first of -which is [+, b] for the running example.

- -

Now that we parsed the left-hand side of an expression and one pair of the -RHS sequence, we have to decide which way the expression associates. In -particular, we could have "(a+b) binop unparsed" or "a + (b binop unparsed)". -To determine this, we look ahead at "binop" to determine its precedence and -compare it to BinOp's precedence (which is '+' in this case):

- -
-
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-
-
- -

If the precedence of the binop to the right of "RHS" is lower or equal to the -precedence of our current operator, then we know that the parentheses associate -as "(a+b) binop ...". In our example, the current operator is "+" and the next -operator is "+", we know that they have the same precedence. In this case we'll -create the AST node for "a+b", and then continue parsing:

- -
-
-      ... if body omitted ...
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-
-
- -

In our example above, this will turn "a+b+" into "(a+b)" and execute the next -iteration of the loop, with "+" as the current token. The code above will eat, -remember, and parse "(c+d)" as the primary expression, which makes the -current pair equal to [+, (c+d)]. It will then evaluate the 'if' conditional above with -"*" as the binop to the right of the primary. In this case, the precedence of "*" is -higher than the precedence of "+" so the if condition will be entered.

- -

The critical question left here is "how can the if condition parse the right -hand side in full"? In particular, to build the AST correctly for our example, -it needs to get all of "(c+d)*e*f" as the RHS expression variable. The code to -do this is surprisingly simple (code from the above two blocks duplicated for -context):

- -
-
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-
-
- -

At this point, we know that the binary operator to the RHS of our primary -has higher precedence than the binop we are currently parsing. As such, we know -that any sequence of pairs whose operators are all higher precedence than "+" -should be parsed together and returned as "RHS". To do this, we recursively -invoke the ParseBinOpRHS function specifying "TokPrec+1" as the minimum -precedence required for it to continue. In our example above, this will cause -it to return the AST node for "(c+d)*e*f" as RHS, which is then set as the RHS -of the '+' expression.

- -

Finally, on the next iteration of the while loop, the "+g" piece is parsed -and added to the AST. With this little bit of code (14 non-trivial lines), we -correctly handle fully general binary expression parsing in a very elegant way. -This was a whirlwind tour of this code, and it is somewhat subtle. I recommend -running through it with a few tough examples to see how it works. -

- -

This wraps up handling of expressions. At this point, we can point the -parser at an arbitrary token stream and build an expression from it, stopping -at the first token that is not part of the expression. Next up we need to -handle function definitions, etc.

- -
- - -

Parsing the Rest

- - -
- -

-The next thing missing is handling of function prototypes. In Kaleidoscope, -these are used both for 'extern' function declarations as well as function body -definitions. The code to do this is straight-forward and not very interesting -(once you've survived expressions): -

- -
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  // Read the list of argument names.
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-
- -

Given this, a function definition is very simple, just a prototype plus -an expression to implement the body:

- -
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-
- -

In addition, we support 'extern' to declare functions like 'sin' and 'cos' as -well as to support forward declaration of user functions. These 'extern's are just -prototypes with no body:

- -
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-
- -

Finally, we'll also let the user type in arbitrary top-level expressions and -evaluate them on the fly. We will handle this by defining anonymous nullary -(zero argument) functions for them:

- -
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-
- -

Now that we have all the pieces, let's build a little driver that will let us -actually execute this code we've built!

- -
- - -

The Driver

- - -
- -

The driver for this simply invokes all of the parsing pieces with a top-level -dispatch loop. There isn't much interesting here, so I'll just include the -top-level loop. See below for full code in the "Top-Level -Parsing" section.

- -
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-
- -

The most interesting part of this is that we ignore top-level semicolons. -Why is this, you ask? The basic reason is that if you type "4 + 5" at the -command line, the parser doesn't know whether that is the end of what you will type -or not. For example, on the next line you could type "def foo..." in which case -4+5 is the end of a top-level expression. Alternatively you could type "* 6", -which would continue the expression. Having top-level semicolons allows you to -type "4+5;", and the parser will know you are done.

- -
- - -

Conclusions

- - -
- -

With just under 400 lines of commented code (240 lines of non-comment, -non-blank code), we fully defined our minimal language, including a lexer, -parser, and AST builder. With this done, the executable will validate -Kaleidoscope code and tell us if it is grammatically invalid. For -example, here is a sample interaction:

- -
-
-$ ./a.out
-ready> def foo(x y) x+foo(y, 4.0);
-Parsed a function definition.
-ready> def foo(x y) x+y y;
-Parsed a function definition.
-Parsed a top-level expr
-ready> def foo(x y) x+y );
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready> extern sin(a);
-ready> Parsed an extern
-ready> ^D
-$ 
-
-
- -

There is a lot of room for extension here. You can define new AST nodes, -extend the language in many ways, etc. In the next -installment, we will describe how to generate LLVM Intermediate -Representation (IR) from the AST.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for this and the previous chapter. -Note that it is fully self-contained: you don't need LLVM or any external -libraries at all for this. (Besides the C and C++ standard libraries, of -course.) To build this, just compile with:

- -
-
-# Compile
-clang++ -g -O3 toy.cpp
-# Run
-./a.out 
-
-
- -

Here is the code:

- -
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <map>
-#include <vector>
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (ParseDefinition()) {
-    fprintf(stderr, "Parsed a function definition.\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (ParseExtern()) {
-    fprintf(stderr, "Parsed an extern\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (ParseTopLevelExpr()) {
-    fprintf(stderr, "Parsed a top-level expr\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  return 0;
-}
-
-
-Next: Implementing Code Generation to LLVM IR -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $ -
- - diff --git a/docs/tutorial/LangImpl2.rst b/docs/tutorial/LangImpl2.rst new file mode 100644 index 000000000000..7262afa8f374 --- /dev/null +++ b/docs/tutorial/LangImpl2.rst @@ -0,0 +1,1096 @@ +=========================================== +Kaleidoscope: Implementing a Parser and AST +=========================================== + +.. contents:: + :local: + +Chapter 2 Introduction +====================== + +Welcome to Chapter 2 of the "`Implementing a language with +LLVM `_" tutorial. This chapter shows you how to use the +lexer, built in `Chapter 1 `_, to build a full +`parser `_ for our Kaleidoscope +language. Once we have a parser, we'll define and build an `Abstract +Syntax Tree `_ (AST). + +The parser we will build uses a combination of `Recursive Descent +Parsing `_ and +`Operator-Precedence +Parsing `_ to +parse the Kaleidoscope language (the latter for binary expressions and +the former for everything else). Before we get to parsing though, lets +talk about the output of the parser: the Abstract Syntax Tree. + +The Abstract Syntax Tree (AST) +============================== + +The AST for a program captures its behavior in such a way that it is +easy for later stages of the compiler (e.g. code generation) to +interpret. We basically want one object for each construct in the +language, and the AST should closely model the language. In +Kaleidoscope, we have expressions, a prototype, and a function object. +We'll start with expressions first: + +.. code-block:: c++ + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + }; + +The code above shows the definition of the base ExprAST class and one +subclass which we use for numeric literals. The important thing to note +about this code is that the NumberExprAST class captures the numeric +value of the literal as an instance variable. This allows later phases +of the compiler to know what the stored numeric value is. + +Right now we only create the AST, so there are no useful accessor +methods on them. It would be very easy to add a virtual method to pretty +print the code, for example. Here are the other expression AST node +definitions that we'll use in the basic form of the Kaleidoscope +language: + +.. code-block:: c++ + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + }; + +This is all (intentionally) rather straight-forward: variables capture +the variable name, binary operators capture their opcode (e.g. '+'), and +calls capture a function name as well as a list of any argument +expressions. One thing that is nice about our AST is that it captures +the language features without talking about the syntax of the language. +Note that there is no discussion about precedence of binary operators, +lexical structure, etc. + +For our basic language, these are all of the expression nodes we'll +define. Because it doesn't have conditional control flow, it isn't +Turing-complete; we'll fix that in a later installment. The two things +we need next are a way to talk about the interface to a function, and a +way to talk about functions themselves: + +.. code-block:: c++ + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes). + class PrototypeAST { + std::string Name; + std::vector Args; + public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + }; + +In Kaleidoscope, functions are typed with just a count of their +arguments. Since all values are double precision floating point, the +type of each argument doesn't need to be stored anywhere. In a more +aggressive and realistic language, the "ExprAST" class would probably +have a type field. + +With this scaffolding, we can now talk about parsing expressions and +function bodies in Kaleidoscope. + +Parser Basics +============= + +Now that we have an AST to build, we need to define the parser code to +build it. The idea here is that we want to parse something like "x+y" +(which is returned as three tokens by the lexer) into an AST that could +be generated with calls like this: + +.. code-block:: c++ + + ExprAST *X = new VariableExprAST("x"); + ExprAST *Y = new VariableExprAST("y"); + ExprAST *Result = new BinaryExprAST('+', X, Y); + +In order to do this, we'll start by defining some basic helper routines: + +.. code-block:: c++ + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + +This implements a simple token buffer around the lexer. This allows us +to look one token ahead at what the lexer is returning. Every function +in our parser will assume that CurTok is the current token that needs to +be parsed. + +.. code-block:: c++ + + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +The ``Error`` routines are simple helper routines that our parser will +use to handle errors. The error recovery in our parser will not be the +best and is not particular user-friendly, but it will be enough for our +tutorial. These routines make it easier to handle errors in routines +that have various return types: they always return null. + +With these basic helper functions, we can implement the first piece of +our grammar: numeric literals. + +Basic Expression Parsing +======================== + +We start with numeric literals, because they are the simplest to +process. For each production in our grammar, we'll define a function +which parses that production. For numeric literals, we have: + +.. code-block:: c++ + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + +This routine is very simple: it expects to be called when the current +token is a ``tok_number`` token. It takes the current number value, +creates a ``NumberExprAST`` node, advances the lexer to the next token, +and finally returns. + +There are some interesting aspects to this. The most important one is +that this routine eats all of the tokens that correspond to the +production and returns the lexer buffer with the next token (which is +not part of the grammar production) ready to go. This is a fairly +standard way to go for recursive descent parsers. For a better example, +the parenthesis operator is defined like this: + +.. code-block:: c++ + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + +This function illustrates a number of interesting things about the +parser: + +1) It shows how we use the Error routines. When called, this function +expects that the current token is a '(' token, but after parsing the +subexpression, it is possible that there is no ')' waiting. For example, +if the user types in "(4 x" instead of "(4)", the parser should emit an +error. Because errors can occur, the parser needs a way to indicate that +they happened: in our parser, we return null on an error. + +2) Another interesting aspect of this function is that it uses recursion +by calling ``ParseExpression`` (we will soon see that +``ParseExpression`` can call ``ParseParenExpr``). This is powerful +because it allows us to handle recursive grammars, and keeps each +production very simple. Note that parentheses do not cause construction +of AST nodes themselves. While we could do it this way, the most +important role of parentheses are to guide the parser and provide +grouping. Once the parser constructs the AST, parentheses are not +needed. + +The next simple production is for handling variable references and +function calls: + +.. code-block:: c++ + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + +This routine follows the same style as the other routines. (It expects +to be called if the current token is a ``tok_identifier`` token). It +also has recursion and error handling. One interesting aspect of this is +that it uses *look-ahead* to determine if the current identifier is a +stand alone variable reference or if it is a function call expression. +It handles this by checking to see if the token after the identifier is +a '(' token, constructing either a ``VariableExprAST`` or +``CallExprAST`` node as appropriate. + +Now that we have all of our simple expression-parsing logic in place, we +can define a helper function to wrap it together into one entry point. +We call this class of expressions "primary" expressions, for reasons +that will become more clear `later in the +tutorial `_. In order to parse an arbitrary +primary expression, we need to determine what sort of expression it is: + +.. code-block:: c++ + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } + } + +Now that you see the definition of this function, it is more obvious why +we can assume the state of CurTok in the various functions. This uses +look-ahead to determine which sort of expression is being inspected, and +then parses it with a function call. + +Now that basic expressions are handled, we need to handle binary +expressions. They are a bit more complex. + +Binary Expression Parsing +========================= + +Binary expressions are significantly harder to parse because they are +often ambiguous. For example, when given the string "x+y\*z", the parser +can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common +definitions from mathematics, we expect the later parse, because "\*" +(multiplication) has higher *precedence* than "+" (addition). + +There are many ways to handle this, but an elegant and efficient way is +to use `Operator-Precedence +Parsing `_. +This parsing technique uses the precedence of binary operators to guide +recursion. To start with, we need a table of precedences: + +.. code-block:: c++ + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + ... + } + +For the basic form of Kaleidoscope, we will only support 4 binary +operators (this can obviously be extended by you, our brave and intrepid +reader). The ``GetTokPrecedence`` function returns the precedence for +the current token, or -1 if the token is not a binary operator. Having a +map makes it easy to add new operators and makes it clear that the +algorithm doesn't depend on the specific operators involved, but it +would be easy enough to eliminate the map and do the comparisons in the +``GetTokPrecedence`` function. (Or just use a fixed-size array). + +With the helper above defined, we can now start parsing binary +expressions. The basic idea of operator precedence parsing is to break +down an expression with potentially ambiguous binary operators into +pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g". +Operator precedence parsing considers this as a stream of primary +expressions separated by binary operators. As such, it will first parse +the leading primary expression "a", then it will see the pairs [+, b] +[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are +primary expressions, the binary expression parser doesn't need to worry +about nested subexpressions like (c+d) at all. + +To start, an expression is a primary expression potentially followed by +a sequence of [binop,primaryexpr] pairs: + +.. code-block:: c++ + + /// expression + /// ::= primary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + +``ParseBinOpRHS`` is the function that parses the sequence of pairs for +us. It takes a precedence and a pointer to an expression for the part +that has been parsed so far. Note that "x" is a perfectly valid +expression: As such, "binoprhs" is allowed to be empty, in which case it +returns the expression that is passed into it. In our example above, the +code passes the expression for "a" into ``ParseBinOpRHS`` and the +current token is "+". + +The precedence value passed into ``ParseBinOpRHS`` indicates the +*minimal operator precedence* that the function is allowed to eat. For +example, if the current pair stream is [+, x] and ``ParseBinOpRHS`` is +passed in a precedence of 40, it will not consume any tokens (because +the precedence of '+' is only 20). With this in mind, ``ParseBinOpRHS`` +starts with: + +.. code-block:: c++ + + /// binoprhs + /// ::= ('+' primary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + +This code gets the precedence of the current token and checks to see if +if is too low. Because we defined invalid tokens to have a precedence of +-1, this check implicitly knows that the pair-stream ends when the token +stream runs out of binary operators. If this check succeeds, we know +that the token is a binary operator and that it will be included in this +expression: + +.. code-block:: c++ + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + +As such, this code eats (and remembers) the binary operator and then +parses the primary expression that follows. This builds up the whole +pair, the first of which is [+, b] for the running example. + +Now that we parsed the left-hand side of an expression and one pair of +the RHS sequence, we have to decide which way the expression associates. +In particular, we could have "(a+b) binop unparsed" or "a + (b binop +unparsed)". To determine this, we look ahead at "binop" to determine its +precedence and compare it to BinOp's precedence (which is '+' in this +case): + +.. code-block:: c++ + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + +If the precedence of the binop to the right of "RHS" is lower or equal +to the precedence of our current operator, then we know that the +parentheses associate as "(a+b) binop ...". In our example, the current +operator is "+" and the next operator is "+", we know that they have the +same precedence. In this case we'll create the AST node for "a+b", and +then continue parsing: + +.. code-block:: c++ + + ... if body omitted ... + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } // loop around to the top of the while loop. + } + +In our example above, this will turn "a+b+" into "(a+b)" and execute the +next iteration of the loop, with "+" as the current token. The code +above will eat, remember, and parse "(c+d)" as the primary expression, +which makes the current pair equal to [+, (c+d)]. It will then evaluate +the 'if' conditional above with "\*" as the binop to the right of the +primary. In this case, the precedence of "\*" is higher than the +precedence of "+" so the if condition will be entered. + +The critical question left here is "how can the if condition parse the +right hand side in full"? In particular, to build the AST correctly for +our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression +variable. The code to do this is surprisingly simple (code from the +above two blocks duplicated for context): + +.. code-block:: c++ + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } // loop around to the top of the while loop. + } + +At this point, we know that the binary operator to the RHS of our +primary has higher precedence than the binop we are currently parsing. +As such, we know that any sequence of pairs whose operators are all +higher precedence than "+" should be parsed together and returned as +"RHS". To do this, we recursively invoke the ``ParseBinOpRHS`` function +specifying "TokPrec+1" as the minimum precedence required for it to +continue. In our example above, this will cause it to return the AST +node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of the '+' +expression. + +Finally, on the next iteration of the while loop, the "+g" piece is +parsed and added to the AST. With this little bit of code (14 +non-trivial lines), we correctly handle fully general binary expression +parsing in a very elegant way. This was a whirlwind tour of this code, +and it is somewhat subtle. I recommend running through it with a few +tough examples to see how it works. + +This wraps up handling of expressions. At this point, we can point the +parser at an arbitrary token stream and build an expression from it, +stopping at the first token that is not part of the expression. Next up +we need to handle function definitions, etc. + +Parsing the Rest +================ + +The next thing missing is handling of function prototypes. In +Kaleidoscope, these are used both for 'extern' function declarations as +well as function body definitions. The code to do this is +straight-forward and not very interesting (once you've survived +expressions): + +.. code-block:: c++ + + /// prototype + /// ::= id '(' id* ')' + static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + // Read the list of argument names. + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); + } + +Given this, a function definition is very simple, just a prototype plus +an expression to implement the body: + +.. code-block:: c++ + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + +In addition, we support 'extern' to declare functions like 'sin' and +'cos' as well as to support forward declaration of user functions. These +'extern's are just prototypes with no body: + +.. code-block:: c++ + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + +Finally, we'll also let the user type in arbitrary top-level expressions +and evaluate them on the fly. We will handle this by defining anonymous +nullary (zero argument) functions for them: + +.. code-block:: c++ + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + +Now that we have all the pieces, let's build a little driver that will +let us actually *execute* this code we've built! + +The Driver +========== + +The driver for this simply invokes all of the parsing pieces with a +top-level dispatch loop. There isn't much interesting here, so I'll just +include the top-level loop. See `below <#code>`_ for full code in the +"Top-Level Parsing" section. + +.. code-block:: c++ + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + +The most interesting part of this is that we ignore top-level +semicolons. Why is this, you ask? The basic reason is that if you type +"4 + 5" at the command line, the parser doesn't know whether that is the +end of what you will type or not. For example, on the next line you +could type "def foo..." in which case 4+5 is the end of a top-level +expression. Alternatively you could type "\* 6", which would continue +the expression. Having top-level semicolons allows you to type "4+5;", +and the parser will know you are done. + +Conclusions +=========== + +With just under 400 lines of commented code (240 lines of non-comment, +non-blank code), we fully defined our minimal language, including a +lexer, parser, and AST builder. With this done, the executable will +validate Kaleidoscope code and tell us if it is grammatically invalid. +For example, here is a sample interaction: + +.. code-block:: bash + + $ ./a.out + ready> def foo(x y) x+foo(y, 4.0); + Parsed a function definition. + ready> def foo(x y) x+y y; + Parsed a function definition. + Parsed a top-level expr + ready> def foo(x y) x+y ); + Parsed a function definition. + Error: unknown token when expecting an expression + ready> extern sin(a); + ready> Parsed an extern + ready> ^D + $ + +There is a lot of room for extension here. You can define new AST nodes, +extend the language in many ways, etc. In the `next +installment `_, we will describe how to generate LLVM +Intermediate Representation (IR) from the AST. + +Full Code Listing +================= + +Here is the complete code listing for this and the previous chapter. +Note that it is fully self-contained: you don't need LLVM or any +external libraries at all for this. (Besides the C and C++ standard +libraries, of course.) To build this, just compile with: + +.. code-block:: bash + + # Compile + clang++ -g -O3 toy.cpp + # Run + ./a.out + +Here is the code: + +.. code-block:: c++ + + #include + #include + #include + #include + #include + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes). + class PrototypeAST { + std::string Name; + std::vector Args; + public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } + } + + /// binoprhs + /// ::= ('+' primary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= primary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing + //===----------------------------------------------------------------------===// + + static void HandleDefinition() { + if (ParseDefinition()) { + fprintf(stderr, "Parsed a function definition.\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (ParseExtern()) { + fprintf(stderr, "Parsed an extern\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (ParseTopLevelExpr()) { + fprintf(stderr, "Parsed a top-level expr\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; + } + +`Next: Implementing Code Generation to LLVM IR `_ + diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html deleted file mode 100644 index 1390153ee7cf..000000000000 --- a/docs/tutorial/LangImpl3.html +++ /dev/null @@ -1,1268 +0,0 @@ - - - - - Kaleidoscope: Implementing code generation to LLVM IR - - - - - - - -

Kaleidoscope: Code generation to LLVM IR

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 3 Introduction

- - -
- -

Welcome to Chapter 3 of the "Implementing a language -with LLVM" tutorial. This chapter shows you how to transform the Abstract Syntax Tree, built in Chapter 2, into LLVM IR. -This will teach you a little bit about how LLVM does things, as well as -demonstrate how easy it is to use. It's much more work to build a lexer and -parser than it is to generate LLVM IR code. :) -

- -

Please note: the code in this chapter and later require LLVM 2.2 or -later. LLVM 2.1 and before will not work with it. Also note that you need -to use a version of this tutorial that matches your LLVM release: If you are -using an official LLVM release, use the version of the documentation included -with your release or on the llvm.org -releases page.

- -
- - -

Code Generation Setup

- - -
- -

-In order to generate LLVM IR, we want some simple setup to get started. First -we define virtual code generation (codegen) methods in each AST class:

- -
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-...
-
-
- -

The Codegen() method says to emit IR for that AST node along with all the things it -depends on, and they all return an LLVM Value object. -"Value" is the class used to represent a "Static Single -Assignment (SSA) register" or "SSA value" in LLVM. The most distinct aspect -of SSA values is that their value is computed as the related instruction -executes, and it does not get a new value until (and if) the instruction -re-executes. In other words, there is no way to "change" an SSA value. For -more information, please read up on Static Single -Assignment - the concepts are really quite natural once you grok them.

- -

Note that instead of adding virtual methods to the ExprAST class hierarchy, -it could also make sense to use a visitor pattern or some -other way to model this. Again, this tutorial won't dwell on good software -engineering practices: for our purposes, adding a virtual method is -simplest.

- -

The -second thing we want is an "Error" method like we used for the parser, which will -be used to report errors found during code generation (for example, use of an -undeclared parameter):

- -
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
-
-
- -

The static variables will be used during code generation. TheModule -is the LLVM construct that contains all of the functions and global variables in -a chunk of code. In many ways, it is the top-level structure that the LLVM IR -uses to contain code.

- -

The Builder object is a helper object that makes it easy to generate -LLVM instructions. Instances of the IRBuilder -class template keep track of the current place to insert instructions and has -methods to create new instructions.

- -

The NamedValues map keeps track of which values are defined in the -current scope and what their LLVM representation is. (In other words, it is a -symbol table for the code). In this form of Kaleidoscope, the only things that -can be referenced are function parameters. As such, function parameters will -be in this map when generating code for their function body.

- -

-With these basics in place, we can start talking about how to generate code for -each expression. Note that this assumes that the Builder has been set -up to generate code into something. For now, we'll assume that this -has already been done, and we'll just use it to emit code. -

- -
- - -

Expression Code Generation

- - -
- -

Generating LLVM code for expression nodes is very straightforward: less -than 45 lines of commented code for all four of our expression nodes. First -we'll do numeric literals:

- -
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-
- -

In the LLVM IR, numeric constants are represented with the -ConstantFP class, which holds the numeric value in an APFloat -internally (APFloat has the capability of holding floating point -constants of Arbitrary Precision). This code basically just -creates and returns a ConstantFP. Note that in the LLVM IR -that constants are all uniqued together and shared. For this reason, the API -uses the "foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".

- -
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-
- -

References to variables are also quite simple using LLVM. In the simple version -of Kaleidoscope, we assume that the variable has already been emitted somewhere -and its value is available. In practice, the only values that can be in the -NamedValues map are function arguments. This -code simply checks to see that the specified name is in the map (if not, an -unknown variable is being referenced) and returns the value for it. In future -chapters, we'll add support for loop induction -variables in the symbol table, and for local variables.

- -
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-
- -

Binary operators start to get more interesting. The basic idea here is that -we recursively emit code for the left-hand side of the expression, then the -right-hand side, then we compute the result of the binary expression. In this -code, we do a simple switch on the opcode to create the right LLVM instruction. -

- -

In the example above, the LLVM builder class is starting to show its value. -IRBuilder knows where to insert the newly created instruction, all you have to -do is specify what instruction to create (e.g. with CreateFAdd), which -operands to use (L and R here) and optionally provide a name -for the generated instruction.

- -

One nice thing about LLVM is that the name is just a hint. For instance, if -the code above emits multiple "addtmp" variables, LLVM will automatically -provide each one with an increasing, unique numeric suffix. Local value names -for instructions are purely optional, but it makes it much easier to read the -IR dumps.

- -

LLVM instructions are constrained by -strict rules: for example, the Left and Right operators of -an add instruction must have the same -type, and the result type of the add must match the operand types. Because -all values in Kaleidoscope are doubles, this makes for very simple code for add, -sub and mul.

- -

On the other hand, LLVM specifies that the fcmp instruction always returns an 'i1' value -(a one bit integer). The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value. In order to get these semantics, we combine the fcmp instruction with -a uitofp instruction. This instruction -converts its input integer into a floating point value by treating the input -as an unsigned value. In contrast, if we used the sitofp instruction, the Kaleidoscope '<' -operator would return 0.0 and -1.0, depending on the input value.

- -
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-
- -

Code generation for function calls is quite straightforward with LLVM. The -code above initially does a function name lookup in the LLVM Module's symbol -table. Recall that the LLVM Module is the container that holds all of the -functions we are JIT'ing. By giving each function the same name as what the -user specifies, we can use the LLVM symbol table to resolve function names for -us.

- -

Once we have the function to call, we recursively codegen each argument that -is to be passed in, and create an LLVM call -instruction. Note that LLVM uses the native C calling conventions by -default, allowing these calls to also call into standard library functions like -"sin" and "cos", with no additional effort.

- -

This wraps up our handling of the four basic expressions that we have so far -in Kaleidoscope. Feel free to go in and add some more. For example, by -browsing the LLVM language reference you'll find -several other interesting instructions that are really easy to plug into our -basic framework.

- -
- - -

Function Code Generation

- - -
- -

Code generation for prototypes and functions must handle a number of -details, which make their code less beautiful than expression code -generation, but allows us to illustrate some important points. First, lets -talk about code generation for prototypes: they are used both for function -bodies and external function declarations. The code starts with:

- -
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-
-
- -

This code packs a lot of power into a few lines. Note first that this -function returns a "Function*" instead of a "Value*". Because a "prototype" -really talks about the external interface for a function (not the value computed -by an expression), it makes sense for it to return the LLVM Function it -corresponds to when codegen'd.

- -

The call to FunctionType::get creates -the FunctionType that should be used for a given Prototype. Since all -function arguments in Kaleidoscope are of type double, the first line creates -a vector of "N" LLVM double types. It then uses the Functiontype::get -method to create a function type that takes "N" doubles as arguments, returns -one double as a result, and that is not vararg (the false parameter indicates -this). Note that Types in LLVM are uniqued just like Constants are, so you -don't "new" a type, you "get" it.

- -

The final line above actually creates the function that the prototype will -correspond to. This indicates the type, linkage and name to use, as well as which -module to insert into. "external linkage" -means that the function may be defined outside the current module and/or that it -is callable by functions outside the module. The Name passed in is the name the -user specified: since "TheModule" is specified, this name is registered -in "TheModule"s symbol table, which is used by the function call code -above.

- -
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-
-
- -

The Module symbol table works just like the Function symbol table when it -comes to name conflicts: if a new function is created with a name that was previously -added to the symbol table, the new function will get implicitly renamed when added to the -Module. The code above exploits this fact to determine if there was a previous -definition of this function.

- -

In Kaleidoscope, I choose to allow redefinitions of functions in two cases: -first, we want to allow 'extern'ing a function more than once, as long as the -prototypes for the externs match (since all arguments have the same type, we -just have to check that the number of arguments match). Second, we want to -allow 'extern'ing a function and then defining a body for it. This is useful -when defining mutually recursive functions.

- -

In order to implement this, the code above first checks to see if there is -a collision on the name of the function. If so, it deletes the function we just -created (by calling eraseFromParent) and then calling -getFunction to get the existing function with the specified name. Note -that many APIs in LLVM have "erase" forms and "remove" forms. The "remove" form -unlinks the object from its parent (e.g. a Function from a Module) and returns -it. The "erase" form unlinks the object and then deletes it.

- -
-
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-
-
- -

In order to verify the logic above, we first check to see if the pre-existing -function is "empty". In this case, empty means that it has no basic blocks in -it, which means it has no body. If it has no body, it is a forward -declaration. Since we don't allow anything after a full definition of the -function, the code rejects this case. If the previous reference to a function -was an 'extern', we simply verify that the number of arguments for that -definition and this one match up. If not, we emit an error.

- -
-
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  return F;
-}
-
-
- -

The last bit of code for prototypes loops over all of the arguments in the -function, setting the name of the LLVM Argument objects to match, and registering -the arguments in the NamedValues map for future use by the -VariableExprAST AST node. Once this is set up, it returns the Function -object to the caller. Note that we don't check for conflicting -argument names here (e.g. "extern foo(a b a)"). Doing so would be very -straight-forward with the mechanics we have already used above.

- -
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-
-
- -

Code generation for function definitions starts out simply enough: we just -codegen the prototype (Proto) and verify that it is ok. We then clear out the -NamedValues map to make sure that there isn't anything in it from the -last function we compiled. Code generation of the prototype ensures that there -is an LLVM Function object that is ready to go for us.

- -
-
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-
-
- -

Now we get to the point where the Builder is set up. The first -line creates a new basic -block (named "entry"), which is inserted into TheFunction. The -second line then tells the builder that new instructions should be inserted into -the end of the new basic block. Basic blocks in LLVM are an important part -of functions that define the Control Flow Graph. -Since we don't have any control flow, our functions will only contain one -block at this point. We'll fix this in Chapter 5 :).

- -
-
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-
-
- -

Once the insertion point is set up, we call the CodeGen() method for -the root expression of the function. If no error happens, this emits code to -compute the expression into the entry block and returns the value that was -computed. Assuming no error, we then create an LLVM ret instruction, which completes the function. -Once the function is built, we call verifyFunction, which -is provided by LLVM. This function does a variety of consistency checks on the -generated code, to determine if our compiler is doing everything right. Using -this is important: it can catch a lot of bugs. Once the function is finished -and validated, we return it.

- -
-
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-  return 0;
-}
-
-
- -

The only piece left here is handling of the error case. For simplicity, we -handle this by merely deleting the function we produced with the -eraseFromParent method. This allows the user to redefine a function -that they incorrectly typed in before: if we didn't delete it, it would live in -the symbol table, with a body, preventing future redefinition.

- -

This code does have a bug, though. Since the PrototypeAST::Codegen -can return a previously defined forward declaration, our code can actually delete -a forward declaration. There are a number of ways to fix this bug, see what you -can come up with! Here is a testcase:

- -
-
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-
-
- -
- - -

Driver Changes and Closing Thoughts

- - -
- -

-For now, code generation to LLVM doesn't really get us much, except that we can -look at the pretty IR calls. The sample code inserts calls to Codegen into the -"HandleDefinition", "HandleExtern" etc functions, and then -dumps out the LLVM IR. This gives a nice way to look at the LLVM IR for simple -functions. For example: -

- -
-
-ready> 4+5;
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-
-
- -

Note how the parser turns the top-level expression into anonymous functions -for us. This will be handy when we add JIT -support in the next chapter. Also note that the code is very literally -transcribed, no optimizations are being performed except simple constant -folding done by IRBuilder. We will -add optimizations explicitly in -the next chapter.

- -
-
-ready> def foo(a b) a*a + 2*a*b + b*b;
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-
-
- -

This shows some simple arithmetic. Notice the striking similarity to the -LLVM builder calls that we use to create the instructions.

- -
-
-ready> def bar(a) foo(a, 4.0) + bar(31337);
-Read function definition:
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-
-
- -

This shows some function calls. Note that this function will take a long -time to execute if you call it. In the future we'll add conditional control -flow to actually make recursion useful :).

- -
-
-ready> extern cos(x);
-Read extern: 
-declare double @cos(double)
-
-ready> cos(1.234);
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-
-
- -

This shows an extern for the libm "cos" function, and a call to it.

- - -
-
-ready> ^D
-; ModuleID = 'my cool jit'
-
-define double @0() {
-entry:
-  %addtmp = fadd double 4.000000e+00, 5.000000e+00
-  ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-
-
- -

When you quit the current demo, it dumps out the IR for the entire module -generated. Here you can see the big picture with all the functions referencing -each other.

- -

This wraps up the third chapter of the Kaleidoscope tutorial. Up next, we'll -describe how to add JIT codegen and optimizer -support to this so we can actually start running code!

- -
- - - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -LLVM code generator. Because this uses the LLVM libraries, we need to link -them in. To do this, we use the llvm-config tool to inform -our makefile/command line about which options to use:

- -
-
-# Compile
-clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
-# Run
-./toy
-
-
- -

Here is the code:

- -
-
-// To build this:
-// See example below.
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/Verifier.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  LLVMContext &Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
-  return 0;
-}
-
-
-Next: Adding JIT and Optimizer Support -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-06-29 14:38:19 +0200 (Fri, 29 Jun 2012) $ -
- - diff --git a/docs/tutorial/LangImpl3.rst b/docs/tutorial/LangImpl3.rst new file mode 100644 index 000000000000..9d5f90839edc --- /dev/null +++ b/docs/tutorial/LangImpl3.rst @@ -0,0 +1,1160 @@ +======================================== +Kaleidoscope: Code generation to LLVM IR +======================================== + +.. contents:: + :local: + +Chapter 3 Introduction +====================== + +Welcome to Chapter 3 of the "`Implementing a language with +LLVM `_" tutorial. This chapter shows you how to transform +the `Abstract Syntax Tree `_, built in Chapter 2, into +LLVM IR. This will teach you a little bit about how LLVM does things, as +well as demonstrate how easy it is to use. It's much more work to build +a lexer and parser than it is to generate LLVM IR code. :) + +**Please note**: the code in this chapter and later require LLVM 2.2 or +later. LLVM 2.1 and before will not work with it. Also note that you +need to use a version of this tutorial that matches your LLVM release: +If you are using an official LLVM release, use the version of the +documentation included with your release or on the `llvm.org releases +page `_. + +Code Generation Setup +===================== + +In order to generate LLVM IR, we want some simple setup to get started. +First we define virtual code generation (codegen) methods in each AST +class: + +.. code-block:: c++ + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + ... + +The Codegen() method says to emit IR for that AST node along with all +the things it depends on, and they all return an LLVM Value object. +"Value" is the class used to represent a "`Static Single Assignment +(SSA) `_ +register" or "SSA value" in LLVM. The most distinct aspect of SSA values +is that their value is computed as the related instruction executes, and +it does not get a new value until (and if) the instruction re-executes. +In other words, there is no way to "change" an SSA value. For more +information, please read up on `Static Single +Assignment `_ +- the concepts are really quite natural once you grok them. + +Note that instead of adding virtual methods to the ExprAST class +hierarchy, it could also make sense to use a `visitor +pattern `_ or some other +way to model this. Again, this tutorial won't dwell on good software +engineering practices: for our purposes, adding a virtual method is +simplest. + +The second thing we want is an "Error" method like we used for the +parser, which will be used to report errors found during code generation +(for example, use of an undeclared parameter): + +.. code-block:: c++ + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + +The static variables will be used during code generation. ``TheModule`` +is the LLVM construct that contains all of the functions and global +variables in a chunk of code. In many ways, it is the top-level +structure that the LLVM IR uses to contain code. + +The ``Builder`` object is a helper object that makes it easy to generate +LLVM instructions. Instances of the +```IRBuilder`` `_ +class template keep track of the current place to insert instructions +and has methods to create new instructions. + +The ``NamedValues`` map keeps track of which values are defined in the +current scope and what their LLVM representation is. (In other words, it +is a symbol table for the code). In this form of Kaleidoscope, the only +things that can be referenced are function parameters. As such, function +parameters will be in this map when generating code for their function +body. + +With these basics in place, we can start talking about how to generate +code for each expression. Note that this assumes that the ``Builder`` +has been set up to generate code *into* something. For now, we'll assume +that this has already been done, and we'll just use it to emit code. + +Expression Code Generation +========================== + +Generating LLVM code for expression nodes is very straightforward: less +than 45 lines of commented code for all four of our expression nodes. +First we'll do numeric literals: + +.. code-block:: c++ + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + +In the LLVM IR, numeric constants are represented with the +``ConstantFP`` class, which holds the numeric value in an ``APFloat`` +internally (``APFloat`` has the capability of holding floating point +constants of Arbitrary Precision). This code basically just creates +and returns a ``ConstantFP``. Note that in the LLVM IR that constants +are all uniqued together and shared. For this reason, the API uses the +"foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)". + +.. code-block:: c++ + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); + } + +References to variables are also quite simple using LLVM. In the simple +version of Kaleidoscope, we assume that the variable has already been +emitted somewhere and its value is available. In practice, the only +values that can be in the ``NamedValues`` map are function arguments. +This code simply checks to see that the specified name is in the map (if +not, an unknown variable is being referenced) and returns the value for +it. In future chapters, we'll add support for `loop induction +variables `_ in the symbol table, and for `local +variables `_. + +.. code-block:: c++ + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } + } + +Binary operators start to get more interesting. The basic idea here is +that we recursively emit code for the left-hand side of the expression, +then the right-hand side, then we compute the result of the binary +expression. In this code, we do a simple switch on the opcode to create +the right LLVM instruction. + +In the example above, the LLVM builder class is starting to show its +value. IRBuilder knows where to insert the newly created instruction, +all you have to do is specify what instruction to create (e.g. with +``CreateFAdd``), which operands to use (``L`` and ``R`` here) and +optionally provide a name for the generated instruction. + +One nice thing about LLVM is that the name is just a hint. For instance, +if the code above emits multiple "addtmp" variables, LLVM will +automatically provide each one with an increasing, unique numeric +suffix. Local value names for instructions are purely optional, but it +makes it much easier to read the IR dumps. + +`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict +rules: for example, the Left and Right operators of an `add +instruction <../LangRef.html#i_add>`_ must have the same type, and the +result type of the add must match the operand types. Because all values +in Kaleidoscope are doubles, this makes for very simple code for add, +sub and mul. + +On the other hand, LLVM specifies that the `fcmp +instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a +one bit integer). The problem with this is that Kaleidoscope wants the +value to be a 0.0 or 1.0 value. In order to get these semantics, we +combine the fcmp instruction with a `uitofp +instruction <../LangRef.html#i_uitofp>`_. This instruction converts its +input integer into a floating point value by treating the input as an +unsigned value. In contrast, if we used the `sitofp +instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator +would return 0.0 and -1.0, depending on the input value. + +.. code-block:: c++ + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + +Code generation for function calls is quite straightforward with LLVM. +The code above initially does a function name lookup in the LLVM +Module's symbol table. Recall that the LLVM Module is the container that +holds all of the functions we are JIT'ing. By giving each function the +same name as what the user specifies, we can use the LLVM symbol table +to resolve function names for us. + +Once we have the function to call, we recursively codegen each argument +that is to be passed in, and create an LLVM `call +instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C +calling conventions by default, allowing these calls to also call into +standard library functions like "sin" and "cos", with no additional +effort. + +This wraps up our handling of the four basic expressions that we have so +far in Kaleidoscope. Feel free to go in and add some more. For example, +by browsing the `LLVM language reference <../LangRef.html>`_ you'll find +several other interesting instructions that are really easy to plug into +our basic framework. + +Function Code Generation +======================== + +Code generation for prototypes and functions must handle a number of +details, which make their code less beautiful than expression code +generation, but allows us to illustrate some important points. First, +lets talk about code generation for prototypes: they are used both for +function bodies and external function declarations. The code starts +with: + +.. code-block:: c++ + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + +This code packs a lot of power into a few lines. Note first that this +function returns a "Function\*" instead of a "Value\*". Because a +"prototype" really talks about the external interface for a function +(not the value computed by an expression), it makes sense for it to +return the LLVM Function it corresponds to when codegen'd. + +The call to ``FunctionType::get`` creates the ``FunctionType`` that +should be used for a given Prototype. Since all function arguments in +Kaleidoscope are of type double, the first line creates a vector of "N" +LLVM double types. It then uses the ``Functiontype::get`` method to +create a function type that takes "N" doubles as arguments, returns one +double as a result, and that is not vararg (the false parameter +indicates this). Note that Types in LLVM are uniqued just like Constants +are, so you don't "new" a type, you "get" it. + +The final line above actually creates the function that the prototype +will correspond to. This indicates the type, linkage and name to use, as +well as which module to insert into. "`external +linkage <../LangRef.html#linkage>`_" means that the function may be +defined outside the current module and/or that it is callable by +functions outside the module. The Name passed in is the name the user +specified: since "``TheModule``" is specified, this name is registered +in "``TheModule``"s symbol table, which is used by the function call +code above. + +.. code-block:: c++ + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + +The Module symbol table works just like the Function symbol table when +it comes to name conflicts: if a new function is created with a name +that was previously added to the symbol table, the new function will get +implicitly renamed when added to the Module. The code above exploits +this fact to determine if there was a previous definition of this +function. + +In Kaleidoscope, I choose to allow redefinitions of functions in two +cases: first, we want to allow 'extern'ing a function more than once, as +long as the prototypes for the externs match (since all arguments have +the same type, we just have to check that the number of arguments +match). Second, we want to allow 'extern'ing a function and then +defining a body for it. This is useful when defining mutually recursive +functions. + +In order to implement this, the code above first checks to see if there +is a collision on the name of the function. If so, it deletes the +function we just created (by calling ``eraseFromParent``) and then +calling ``getFunction`` to get the existing function with the specified +name. Note that many APIs in LLVM have "erase" forms and "remove" forms. +The "remove" form unlinks the object from its parent (e.g. a Function +from a Module) and returns it. The "erase" form unlinks the object and +then deletes it. + +.. code-block:: c++ + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + +In order to verify the logic above, we first check to see if the +pre-existing function is "empty". In this case, empty means that it has +no basic blocks in it, which means it has no body. If it has no body, it +is a forward declaration. Since we don't allow anything after a full +definition of the function, the code rejects this case. If the previous +reference to a function was an 'extern', we simply verify that the +number of arguments for that definition and this one match up. If not, +we emit an error. + +.. code-block:: c++ + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + return F; + } + +The last bit of code for prototypes loops over all of the arguments in +the function, setting the name of the LLVM Argument objects to match, +and registering the arguments in the ``NamedValues`` map for future use +by the ``VariableExprAST`` AST node. Once this is set up, it returns the +Function object to the caller. Note that we don't check for conflicting +argument names here (e.g. "extern foo(a b a)"). Doing so would be very +straight-forward with the mechanics we have already used above. + +.. code-block:: c++ + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + +Code generation for function definitions starts out simply enough: we +just codegen the prototype (Proto) and verify that it is ok. We then +clear out the ``NamedValues`` map to make sure that there isn't anything +in it from the last function we compiled. Code generation of the +prototype ensures that there is an LLVM Function object that is ready to +go for us. + +.. code-block:: c++ + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + +Now we get to the point where the ``Builder`` is set up. The first line +creates a new `basic block `_ +(named "entry"), which is inserted into ``TheFunction``. The second line +then tells the builder that new instructions should be inserted into the +end of the new basic block. Basic blocks in LLVM are an important part +of functions that define the `Control Flow +Graph `_. Since we +don't have any control flow, our functions will only contain one block +at this point. We'll fix this in `Chapter 5 `_ :). + +.. code-block:: c++ + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + +Once the insertion point is set up, we call the ``CodeGen()`` method for +the root expression of the function. If no error happens, this emits +code to compute the expression into the entry block and returns the +value that was computed. Assuming no error, we then create an LLVM `ret +instruction <../LangRef.html#i_ret>`_, which completes the function. +Once the function is built, we call ``verifyFunction``, which is +provided by LLVM. This function does a variety of consistency checks on +the generated code, to determine if our compiler is doing everything +right. Using this is important: it can catch a lot of bugs. Once the +function is finished and validated, we return it. + +.. code-block:: c++ + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; + } + +The only piece left here is handling of the error case. For simplicity, +we handle this by merely deleting the function we produced with the +``eraseFromParent`` method. This allows the user to redefine a function +that they incorrectly typed in before: if we didn't delete it, it would +live in the symbol table, with a body, preventing future redefinition. + +This code does have a bug, though. Since the ``PrototypeAST::Codegen`` +can return a previously defined forward declaration, our code can +actually delete a forward declaration. There are a number of ways to fix +this bug, see what you can come up with! Here is a testcase: + +:: + + extern foo(a b); # ok, defines foo. + def foo(a b) c; # error, 'c' is invalid. + def bar() foo(1, 2); # error, unknown function "foo" + +Driver Changes and Closing Thoughts +=================================== + +For now, code generation to LLVM doesn't really get us much, except that +we can look at the pretty IR calls. The sample code inserts calls to +Codegen into the "``HandleDefinition``", "``HandleExtern``" etc +functions, and then dumps out the LLVM IR. This gives a nice way to look +at the LLVM IR for simple functions. For example: + +:: + + ready> 4+5; + Read top-level expression: + define double @0() { + entry: + ret double 9.000000e+00 + } + +Note how the parser turns the top-level expression into anonymous +functions for us. This will be handy when we add `JIT +support `_ in the next chapter. Also note that the +code is very literally transcribed, no optimizations are being performed +except simple constant folding done by IRBuilder. We will `add +optimizations `_ explicitly in the next +chapter. + +:: + + ready> def foo(a b) a*a + 2*a*b + b*b; + Read function definition: + define double @foo(double %a, double %b) { + entry: + %multmp = fmul double %a, %a + %multmp1 = fmul double 2.000000e+00, %a + %multmp2 = fmul double %multmp1, %b + %addtmp = fadd double %multmp, %multmp2 + %multmp3 = fmul double %b, %b + %addtmp4 = fadd double %addtmp, %multmp3 + ret double %addtmp4 + } + +This shows some simple arithmetic. Notice the striking similarity to the +LLVM builder calls that we use to create the instructions. + +:: + + ready> def bar(a) foo(a, 4.0) + bar(31337); + Read function definition: + define double @bar(double %a) { + entry: + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) + %addtmp = fadd double %calltmp, %calltmp1 + ret double %addtmp + } + +This shows some function calls. Note that this function will take a long +time to execute if you call it. In the future we'll add conditional +control flow to actually make recursion useful :). + +:: + + ready> extern cos(x); + Read extern: + declare double @cos(double) + + ready> cos(1.234); + Read top-level expression: + define double @1() { + entry: + %calltmp = call double @cos(double 1.234000e+00) + ret double %calltmp + } + +This shows an extern for the libm "cos" function, and a call to it. + +.. TODO:: Abandon Pygments' horrible `llvm` lexer. It just totally gives up + on highlighting this due to the first line. + +:: + + ready> ^D + ; ModuleID = 'my cool jit' + + define double @0() { + entry: + %addtmp = fadd double 4.000000e+00, 5.000000e+00 + ret double %addtmp + } + + define double @foo(double %a, double %b) { + entry: + %multmp = fmul double %a, %a + %multmp1 = fmul double 2.000000e+00, %a + %multmp2 = fmul double %multmp1, %b + %addtmp = fadd double %multmp, %multmp2 + %multmp3 = fmul double %b, %b + %addtmp4 = fadd double %addtmp, %multmp3 + ret double %addtmp4 + } + + define double @bar(double %a) { + entry: + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) + %addtmp = fadd double %calltmp, %calltmp1 + ret double %addtmp + } + + declare double @cos(double) + + define double @1() { + entry: + %calltmp = call double @cos(double 1.234000e+00) + ret double %calltmp + } + +When you quit the current demo, it dumps out the IR for the entire +module generated. Here you can see the big picture with all the +functions referencing each other. + +This wraps up the third chapter of the Kaleidoscope tutorial. Up next, +we'll describe how to `add JIT codegen and optimizer +support `_ to this so we can actually start running +code! + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the LLVM code generator. Because this uses the LLVM libraries, we need +to link them in. To do this, we use the +`llvm-config `_ tool to inform +our makefile/command line about which options to use: + +.. code-block:: bash + + # Compile + clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy + # Run + ./toy + +Here is the code: + +.. code-block:: c++ + + // To build this: + // See example below. + + #include "llvm/DerivedTypes.h" + #include "llvm/IRBuilder.h" + #include "llvm/LLVMContext.h" + #include "llvm/Module.h" + #include "llvm/Analysis/Verifier.h" + #include + #include + #include + #include + using namespace llvm; + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes). + class PrototypeAST { + std::string Name; + std::vector Args; + public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } + } + + /// binoprhs + /// ::= ('+' primary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= primary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Code Generation + //===----------------------------------------------------------------------===// + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); + } + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } + } + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; + } + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing and JIT Driver + //===----------------------------------------------------------------------===// + + static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // "Library" functions that can be "extern'd" from user code. + //===----------------------------------------------------------------------===// + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->dump(); + + return 0; + } + +`Next: Adding JIT and Optimizer Support `_ + diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html deleted file mode 100644 index 5e9c65676c9e..000000000000 --- a/docs/tutorial/LangImpl4.html +++ /dev/null @@ -1,1152 +0,0 @@ - - - - - Kaleidoscope: Adding JIT and Optimizer Support - - - - - - - -

Kaleidoscope: Adding JIT and Optimizer Support

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 4 Introduction

- - -
- -

Welcome to Chapter 4 of the "Implementing a language -with LLVM" tutorial. Chapters 1-3 described the implementation of a simple -language and added support for generating LLVM IR. This chapter describes -two new techniques: adding optimizer support to your language, and adding JIT -compiler support. These additions will demonstrate how to get nice, efficient code -for the Kaleidoscope language.

- -
- - -

Trivial Constant Folding

- - -
- -

-Our demonstration for Chapter 3 is elegant and easy to extend. Unfortunately, -it does not produce wonderful code. The IRBuilder, however, does give us -obvious optimizations when compiling simple code:

- -
-
-ready> def test(x) 1+2+x;
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-
-
- -

This code is not a literal transcription of the AST built by parsing the -input. That would be: - -

-
-ready> def test(x) 1+2+x;
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 2.000000e+00, 1.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-
-
- -

Constant folding, as seen above, in particular, is a very common and very -important optimization: so much so that many language implementors implement -constant folding support in their AST representation.

- -

With LLVM, you don't need this support in the AST. Since all calls to build -LLVM IR go through the LLVM IR builder, the builder itself checked to see if -there was a constant folding opportunity when you call it. If so, it just does -the constant fold and return the constant instead of creating an instruction. - -

Well, that was easy :). In practice, we recommend always using -IRBuilder when generating code like this. It has no -"syntactic overhead" for its use (you don't have to uglify your compiler with -constant checks everywhere) and it can dramatically reduce the amount of -LLVM IR that is generated in some cases (particular for languages with a macro -preprocessor or that use a lot of constants).

- -

On the other hand, the IRBuilder is limited by the fact -that it does all of its analysis inline with the code as it is built. If you -take a slightly more complex example:

- -
-
-ready> def test(x) (1+2+x)*(x+(1+2));
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-
-
- -

In this case, the LHS and RHS of the multiplication are the same value. We'd -really like to see this generate "tmp = x+3; result = tmp*tmp;" instead -of computing "x+3" twice.

- -

Unfortunately, no amount of local analysis will be able to detect and correct -this. This requires two transformations: reassociation of expressions (to -make the add's lexically identical) and Common Subexpression Elimination (CSE) -to delete the redundant add instruction. Fortunately, LLVM provides a broad -range of optimizations that you can use, in the form of "passes".

- -
- - -

LLVM Optimization Passes

- - -
- -

LLVM provides many optimization passes, which do many different sorts of -things and have different tradeoffs. Unlike other systems, LLVM doesn't hold -to the mistaken notion that one set of optimizations is right for all languages -and for all situations. LLVM allows a compiler implementor to make complete -decisions about what optimizations to use, in which order, and in what -situation.

- -

As a concrete example, LLVM supports both "whole module" passes, which look -across as large of body of code as they can (often a whole file, but if run -at link time, this can be a substantial portion of the whole program). It also -supports and includes "per-function" passes which just operate on a single -function at a time, without looking at other functions. For more information -on passes and how they are run, see the How -to Write a Pass document and the List of LLVM -Passes.

- -

For Kaleidoscope, we are currently generating functions on the fly, one at -a time, as the user types them in. We aren't shooting for the ultimate -optimization experience in this setting, but we also want to catch the easy and -quick stuff where possible. As such, we will choose to run a few per-function -optimizations as the user types the function in. If we wanted to make a "static -Kaleidoscope compiler", we would use exactly the code we have now, except that -we would defer running the optimizer until the entire file has been parsed.

- -

In order to get per-function optimizations going, we need to set up a -FunctionPassManager to hold and -organize the LLVM optimizations that we want to run. Once we have that, we can -add a set of optimizations to run. The code looks like this:

- -
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-
- -

This code defines a FunctionPassManager, "OurFPM". It -requires a pointer to the Module to construct itself. Once it is set -up, we use a series of "add" calls to add a bunch of LLVM passes. The first -pass is basically boilerplate, it adds a pass so that later optimizations know -how the data structures in the program are laid out. The -"TheExecutionEngine" variable is related to the JIT, which we will get -to in the next section.

- -

In this case, we choose to add 4 optimization passes. The passes we chose -here are a pretty standard set of "cleanup" optimizations that are useful for -a wide variety of code. I won't delve into what they do but, believe me, -they are a good starting place :).

- -

Once the PassManager is set up, we need to make use of it. We do this by -running it after our newly created function is constructed (in -FunctionAST::Codegen), but before it is returned to the client:

- -
-
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-    
-    return TheFunction;
-  }
-
-
- -

As you can see, this is pretty straightforward. The -FunctionPassManager optimizes and updates the LLVM Function* in place, -improving (hopefully) its body. With this in place, we can try our test above -again:

- -
-
-ready> def test(x) (1+2+x)*(x+(1+2));
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-
-
- -

As expected, we now get our nicely optimized code, saving a floating point -add instruction from every execution of this function.

- -

LLVM provides a wide variety of optimizations that can be used in certain -circumstances. Some documentation about the various -passes is available, but it isn't very complete. Another good source of -ideas can come from looking at the passes that Clang runs to get -started. The "opt" tool allows you to experiment with passes from the -command line, so you can see if they do anything.

- -

Now that we have reasonable code coming out of our front-end, lets talk about -executing it!

- -
- - -

Adding a JIT Compiler

- - -
- -

Code that is available in LLVM IR can have a wide variety of tools -applied to it. For example, you can run optimizations on it (as we did above), -you can dump it out in textual or binary forms, you can compile the code to an -assembly file (.s) for some target, or you can JIT compile it. The nice thing -about the LLVM IR representation is that it is the "common currency" between -many different parts of the compiler. -

- -

In this section, we'll add JIT compiler support to our interpreter. The -basic idea that we want for Kaleidoscope is to have the user enter function -bodies as they do now, but immediately evaluate the top-level expressions they -type in. For example, if they type in "1 + 2;", we should evaluate and print -out 3. If they define a function, they should be able to call it from the -command line.

- -

In order to do this, we first declare and initialize the JIT. This is done -by adding a global variable and a call in main:

- -
-
-static ExecutionEngine *TheExecutionEngine;
-...
-int main() {
-  ..
-  // Create the JIT.  This takes ownership of the module.
-  TheExecutionEngine = EngineBuilder(TheModule).create();
-  ..
-}
-
-
- -

This creates an abstract "Execution Engine" which can be either a JIT -compiler or the LLVM interpreter. LLVM will automatically pick a JIT compiler -for you if one is available for your platform, otherwise it will fall back to -the interpreter.

- -

Once the ExecutionEngine is created, the JIT is ready to be used. -There are a variety of APIs that are useful, but the simplest one is the -"getPointerToFunction(F)" method. This method JIT compiles the -specified LLVM Function and returns a function pointer to the generated machine -code. In our case, this means that we can change the code that parses a -top-level expression to look like this:

- -
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      LF->dump();  // Dump the function for exposition purposes.
-    
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-
-
- -

Recall that we compile top-level expressions into a self-contained LLVM -function that takes no arguments and returns the computed double. Because the -LLVM JIT compiler matches the native platform ABI, this means that you can just -cast the result pointer to a function pointer of that type and call it directly. -This means, there is no difference between JIT compiled code and native machine -code that is statically linked into your application.

- -

With just these two changes, lets see how Kaleidoscope works now!

- -
-
-ready> 4+5;
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-
-Evaluated to 9.000000
-
-
- -

Well this looks like it is basically working. The dump of the function -shows the "no argument function that always returns double" that we synthesize -for each top-level expression that is typed in. This demonstrates very basic -functionality, but can we do more?

- -
-
-ready> def testfunc(x y) x + y*2;  
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-  %multmp = fmul double %y, 2.000000e+00
-  %addtmp = fadd double %multmp, %x
-  ret double %addtmp
-}
-
-ready> testfunc(4, 10);
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-  ret double %calltmp
-}
-
-Evaluated to 24.000000
-
-
- -

This illustrates that we can now call user code, but there is something a bit -subtle going on here. Note that we only invoke the JIT on the anonymous -functions that call testfunc, but we never invoked it -on testfunc itself. What actually happened here is that the JIT -scanned for all non-JIT'd functions transitively called from the anonymous -function and compiled all of them before returning -from getPointerToFunction().

- -

The JIT provides a number of other more advanced interfaces for things like -freeing allocated machine code, rejit'ing functions to update them, etc. -However, even with this simple code, we get some surprisingly powerful -capabilities - check this out (I removed the dump of the anonymous functions, -you should get the idea by now :) :

- -
-
-ready> extern sin(x);
-Read extern: 
-declare double @sin(double)
-
-ready> extern cos(x);
-Read extern: 
-declare double @cos(double)
-
-ready> sin(1.0);
-Read top-level expression:
-define double @2() {
-entry:
-  ret double 0x3FEAED548F090CEE
-}
-
-Evaluated to 0.841471
-
-ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
-Read function definition:
-define double @foo(double %x) {
-entry:
-  %calltmp = call double @sin(double %x)
-  %multmp = fmul double %calltmp, %calltmp
-  %calltmp2 = call double @cos(double %x)
-  %multmp4 = fmul double %calltmp2, %calltmp2
-  %addtmp = fadd double %multmp, %multmp4
-  ret double %addtmp
-}
-
-ready> foo(4.0);
-Read top-level expression:
-define double @3() {
-entry:
-  %calltmp = call double @foo(double 4.000000e+00)
-  ret double %calltmp
-}
-
-Evaluated to 1.000000
-
-
- -

Whoa, how does the JIT know about sin and cos? The answer is surprisingly -simple: in this -example, the JIT started execution of a function and got to a function call. It -realized that the function was not yet JIT compiled and invoked the standard set -of routines to resolve the function. In this case, there is no body defined -for the function, so the JIT ended up calling "dlsym("sin")" on the -Kaleidoscope process itself. -Since "sin" is defined within the JIT's address space, it simply -patches up calls in the module to call the libm version of sin -directly.

- -

The LLVM JIT provides a number of interfaces (look in the -ExecutionEngine.h file) for controlling how unknown functions get -resolved. It allows you to establish explicit mappings between IR objects and -addresses (useful for LLVM global variables that you want to map to static -tables, for example), allows you to dynamically decide on the fly based on the -function name, and even allows you to have the JIT compile functions lazily the -first time they're called.

- -

One interesting application of this is that we can now extend the language -by writing arbitrary C++ code to implement operations. For example, if we add: -

- -
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-
- -

Now we can produce simple output to the console by using things like: -"extern putchard(x); putchard(120);", which prints a lowercase 'x' on -the console (120 is the ASCII code for 'x'). Similar code could be used to -implement file I/O, console input, and many other capabilities in -Kaleidoscope.

- -

This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At -this point, we can compile a non-Turing-complete programming language, optimize -and JIT compile it in a user-driven way. Next up we'll look into extending the language with control flow constructs, -tackling some interesting LLVM IR issues along the way.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -LLVM JIT and optimizer. To build this example, use: -

- -
-
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-
-
- -

-If you are compiling this on Linux, make sure to add the "-rdynamic" option -as well. This makes sure that the external functions are resolved properly -at runtime.

- -

Here is the code:

- -
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF->dump();
-
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
-  return 0;
-}
-
-
- -Next: Extending the language: control flow -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/LangImpl4.rst b/docs/tutorial/LangImpl4.rst new file mode 100644 index 000000000000..96c06d124ef1 --- /dev/null +++ b/docs/tutorial/LangImpl4.rst @@ -0,0 +1,1061 @@ +============================================== +Kaleidoscope: Adding JIT and Optimizer Support +============================================== + +.. contents:: + :local: + +Chapter 4 Introduction +====================== + +Welcome to Chapter 4 of the "`Implementing a language with +LLVM `_" tutorial. Chapters 1-3 described the implementation +of a simple language and added support for generating LLVM IR. This +chapter describes two new techniques: adding optimizer support to your +language, and adding JIT compiler support. These additions will +demonstrate how to get nice, efficient code for the Kaleidoscope +language. + +Trivial Constant Folding +======================== + +Our demonstration for Chapter 3 is elegant and easy to extend. +Unfortunately, it does not produce wonderful code. The IRBuilder, +however, does give us obvious optimizations when compiling simple code: + +:: + + ready> def test(x) 1+2+x; + Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 3.000000e+00, %x + ret double %addtmp + } + +This code is not a literal transcription of the AST built by parsing the +input. That would be: + +:: + + ready> def test(x) 1+2+x; + Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 2.000000e+00, 1.000000e+00 + %addtmp1 = fadd double %addtmp, %x + ret double %addtmp1 + } + +Constant folding, as seen above, in particular, is a very common and +very important optimization: so much so that many language implementors +implement constant folding support in their AST representation. + +With LLVM, you don't need this support in the AST. Since all calls to +build LLVM IR go through the LLVM IR builder, the builder itself checked +to see if there was a constant folding opportunity when you call it. If +so, it just does the constant fold and return the constant instead of +creating an instruction. + +Well, that was easy :). In practice, we recommend always using +``IRBuilder`` when generating code like this. It has no "syntactic +overhead" for its use (you don't have to uglify your compiler with +constant checks everywhere) and it can dramatically reduce the amount of +LLVM IR that is generated in some cases (particular for languages with a +macro preprocessor or that use a lot of constants). + +On the other hand, the ``IRBuilder`` is limited by the fact that it does +all of its analysis inline with the code as it is built. If you take a +slightly more complex example: + +:: + + ready> def test(x) (1+2+x)*(x+(1+2)); + ready> Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 3.000000e+00, %x + %addtmp1 = fadd double %x, 3.000000e+00 + %multmp = fmul double %addtmp, %addtmp1 + ret double %multmp + } + +In this case, the LHS and RHS of the multiplication are the same value. +We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``" +instead of computing "``x+3``" twice. + +Unfortunately, no amount of local analysis will be able to detect and +correct this. This requires two transformations: reassociation of +expressions (to make the add's lexically identical) and Common +Subexpression Elimination (CSE) to delete the redundant add instruction. +Fortunately, LLVM provides a broad range of optimizations that you can +use, in the form of "passes". + +LLVM Optimization Passes +======================== + +LLVM provides many optimization passes, which do many different sorts of +things and have different tradeoffs. Unlike other systems, LLVM doesn't +hold to the mistaken notion that one set of optimizations is right for +all languages and for all situations. LLVM allows a compiler implementor +to make complete decisions about what optimizations to use, in which +order, and in what situation. + +As a concrete example, LLVM supports both "whole module" passes, which +look across as large of body of code as they can (often a whole file, +but if run at link time, this can be a substantial portion of the whole +program). It also supports and includes "per-function" passes which just +operate on a single function at a time, without looking at other +functions. For more information on passes and how they are run, see the +`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the +`List of LLVM Passes <../Passes.html>`_. + +For Kaleidoscope, we are currently generating functions on the fly, one +at a time, as the user types them in. We aren't shooting for the +ultimate optimization experience in this setting, but we also want to +catch the easy and quick stuff where possible. As such, we will choose +to run a few per-function optimizations as the user types the function +in. If we wanted to make a "static Kaleidoscope compiler", we would use +exactly the code we have now, except that we would defer running the +optimizer until the entire file has been parsed. + +In order to get per-function optimizations going, we need to set up a +`FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold +and organize the LLVM optimizations that we want to run. Once we have +that, we can add a set of optimizations to run. The code looks like +this: + +.. code-block:: c++ + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + +This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a +pointer to the ``Module`` to construct itself. Once it is set up, we use +a series of "add" calls to add a bunch of LLVM passes. The first pass is +basically boilerplate, it adds a pass so that later optimizations know +how the data structures in the program are laid out. The +"``TheExecutionEngine``" variable is related to the JIT, which we will +get to in the next section. + +In this case, we choose to add 4 optimization passes. The passes we +chose here are a pretty standard set of "cleanup" optimizations that are +useful for a wide variety of code. I won't delve into what they do but, +believe me, they are a good starting place :). + +Once the PassManager is set up, we need to make use of it. We do this by +running it after our newly created function is constructed (in +``FunctionAST::Codegen``), but before it is returned to the client: + +.. code-block:: c++ + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + +As you can see, this is pretty straightforward. The +``FunctionPassManager`` optimizes and updates the LLVM Function\* in +place, improving (hopefully) its body. With this in place, we can try +our test above again: + +:: + + ready> def test(x) (1+2+x)*(x+(1+2)); + ready> Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double %x, 3.000000e+00 + %multmp = fmul double %addtmp, %addtmp + ret double %multmp + } + +As expected, we now get our nicely optimized code, saving a floating +point add instruction from every execution of this function. + +LLVM provides a wide variety of optimizations that can be used in +certain circumstances. Some `documentation about the various +passes <../Passes.html>`_ is available, but it isn't very complete. +Another good source of ideas can come from looking at the passes that +``Clang`` runs to get started. The "``opt``" tool allows you to +experiment with passes from the command line, so you can see if they do +anything. + +Now that we have reasonable code coming out of our front-end, lets talk +about executing it! + +Adding a JIT Compiler +===================== + +Code that is available in LLVM IR can have a wide variety of tools +applied to it. For example, you can run optimizations on it (as we did +above), you can dump it out in textual or binary forms, you can compile +the code to an assembly file (.s) for some target, or you can JIT +compile it. The nice thing about the LLVM IR representation is that it +is the "common currency" between many different parts of the compiler. + +In this section, we'll add JIT compiler support to our interpreter. The +basic idea that we want for Kaleidoscope is to have the user enter +function bodies as they do now, but immediately evaluate the top-level +expressions they type in. For example, if they type in "1 + 2;", we +should evaluate and print out 3. If they define a function, they should +be able to call it from the command line. + +In order to do this, we first declare and initialize the JIT. This is +done by adding a global variable and a call in ``main``: + +.. code-block:: c++ + + static ExecutionEngine *TheExecutionEngine; + ... + int main() { + .. + // Create the JIT. This takes ownership of the module. + TheExecutionEngine = EngineBuilder(TheModule).create(); + .. + } + +This creates an abstract "Execution Engine" which can be either a JIT +compiler or the LLVM interpreter. LLVM will automatically pick a JIT +compiler for you if one is available for your platform, otherwise it +will fall back to the interpreter. + +Once the ``ExecutionEngine`` is created, the JIT is ready to be used. +There are a variety of APIs that are useful, but the simplest one is the +"``getPointerToFunction(F)``" method. This method JIT compiles the +specified LLVM Function and returns a function pointer to the generated +machine code. In our case, this means that we can change the code that +parses a top-level expression to look like this: + +.. code-block:: c++ + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + LF->dump(); // Dump the function for exposition purposes. + + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + +Recall that we compile top-level expressions into a self-contained LLVM +function that takes no arguments and returns the computed double. +Because the LLVM JIT compiler matches the native platform ABI, this +means that you can just cast the result pointer to a function pointer of +that type and call it directly. This means, there is no difference +between JIT compiled code and native machine code that is statically +linked into your application. + +With just these two changes, lets see how Kaleidoscope works now! + +:: + + ready> 4+5; + Read top-level expression: + define double @0() { + entry: + ret double 9.000000e+00 + } + + Evaluated to 9.000000 + +Well this looks like it is basically working. The dump of the function +shows the "no argument function that always returns double" that we +synthesize for each top-level expression that is typed in. This +demonstrates very basic functionality, but can we do more? + +:: + + ready> def testfunc(x y) x + y*2; + Read function definition: + define double @testfunc(double %x, double %y) { + entry: + %multmp = fmul double %y, 2.000000e+00 + %addtmp = fadd double %multmp, %x + ret double %addtmp + } + + ready> testfunc(4, 10); + Read top-level expression: + define double @1() { + entry: + %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01) + ret double %calltmp + } + + Evaluated to 24.000000 + +This illustrates that we can now call user code, but there is something +a bit subtle going on here. Note that we only invoke the JIT on the +anonymous functions that *call testfunc*, but we never invoked it on +*testfunc* itself. What actually happened here is that the JIT scanned +for all non-JIT'd functions transitively called from the anonymous +function and compiled all of them before returning from +``getPointerToFunction()``. + +The JIT provides a number of other more advanced interfaces for things +like freeing allocated machine code, rejit'ing functions to update them, +etc. However, even with this simple code, we get some surprisingly +powerful capabilities - check this out (I removed the dump of the +anonymous functions, you should get the idea by now :) : + +:: + + ready> extern sin(x); + Read extern: + declare double @sin(double) + + ready> extern cos(x); + Read extern: + declare double @cos(double) + + ready> sin(1.0); + Read top-level expression: + define double @2() { + entry: + ret double 0x3FEAED548F090CEE + } + + Evaluated to 0.841471 + + ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x); + Read function definition: + define double @foo(double %x) { + entry: + %calltmp = call double @sin(double %x) + %multmp = fmul double %calltmp, %calltmp + %calltmp2 = call double @cos(double %x) + %multmp4 = fmul double %calltmp2, %calltmp2 + %addtmp = fadd double %multmp, %multmp4 + ret double %addtmp + } + + ready> foo(4.0); + Read top-level expression: + define double @3() { + entry: + %calltmp = call double @foo(double 4.000000e+00) + ret double %calltmp + } + + Evaluated to 1.000000 + +Whoa, how does the JIT know about sin and cos? The answer is +surprisingly simple: in this example, the JIT started execution of a +function and got to a function call. It realized that the function was +not yet JIT compiled and invoked the standard set of routines to resolve +the function. In this case, there is no body defined for the function, +so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope +process itself. Since "``sin``" is defined within the JIT's address +space, it simply patches up calls in the module to call the libm version +of ``sin`` directly. + +The LLVM JIT provides a number of interfaces (look in the +``ExecutionEngine.h`` file) for controlling how unknown functions get +resolved. It allows you to establish explicit mappings between IR +objects and addresses (useful for LLVM global variables that you want to +map to static tables, for example), allows you to dynamically decide on +the fly based on the function name, and even allows you to have the JIT +compile functions lazily the first time they're called. + +One interesting application of this is that we can now extend the +language by writing arbitrary C++ code to implement operations. For +example, if we add: + +.. code-block:: c++ + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + +Now we can produce simple output to the console by using things like: +"``extern putchard(x); putchard(120);``", which prints a lowercase 'x' +on the console (120 is the ASCII code for 'x'). Similar code could be +used to implement file I/O, console input, and many other capabilities +in Kaleidoscope. + +This completes the JIT and optimizer chapter of the Kaleidoscope +tutorial. At this point, we can compile a non-Turing-complete +programming language, optimize and JIT compile it in a user-driven way. +Next up we'll look into `extending the language with control flow +constructs `_, tackling some interesting LLVM IR issues +along the way. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the LLVM JIT and optimizer. To build this example, use: + +.. code-block:: bash + + # Compile + clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy + # Run + ./toy + +If you are compiling this on Linux, make sure to add the "-rdynamic" +option as well. This makes sure that the external functions are resolved +properly at runtime. + +Here is the code: + +.. code-block:: c++ + + #include "llvm/DerivedTypes.h" + #include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/ExecutionEngine/JIT.h" + #include "llvm/IRBuilder.h" + #include "llvm/LLVMContext.h" + #include "llvm/Module.h" + #include "llvm/PassManager.h" + #include "llvm/Analysis/Verifier.h" + #include "llvm/Analysis/Passes.h" + #include "llvm/DataLayout.h" + #include "llvm/Transforms/Scalar.h" + #include "llvm/Support/TargetSelect.h" + #include + #include + #include + #include + using namespace llvm; + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes). + class PrototypeAST { + std::string Name; + std::vector Args; + public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } + } + + /// binoprhs + /// ::= ('+' primary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= primary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Code Generation + //===----------------------------------------------------------------------===// + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + static FunctionPassManager *TheFPM; + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); + } + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } + } + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; + } + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing and JIT Driver + //===----------------------------------------------------------------------===// + + static ExecutionEngine *TheExecutionEngine; + + static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // "Library" functions that can be "extern'd" from user code. + //===----------------------------------------------------------------------===// + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; + } + +`Next: Extending the language: control flow `_ + diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html deleted file mode 100644 index 9a9fd8c14e09..000000000000 --- a/docs/tutorial/LangImpl5.html +++ /dev/null @@ -1,1772 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: Control Flow - - - - - - - -

Kaleidoscope: Extending the Language: Control Flow

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 5 Introduction

- - -
- -

Welcome to Chapter 5 of the "Implementing a language -with LLVM" tutorial. Parts 1-4 described the implementation of the simple -Kaleidoscope language and included support for generating LLVM IR, followed by -optimizations and a JIT compiler. Unfortunately, as presented, Kaleidoscope is -mostly useless: it has no control flow other than call and return. This means -that you can't have conditional branches in the code, significantly limiting its -power. In this episode of "build that compiler", we'll extend Kaleidoscope to -have an if/then/else expression plus a simple 'for' loop.

- -
- - -

If/Then/Else

- - -
- -

-Extending Kaleidoscope to support if/then/else is quite straightforward. It -basically requires adding support for this "new" concept to the lexer, -parser, AST, and LLVM code emitter. This example is nice, because it shows how -easy it is to "grow" a language over time, incrementally extending it as new -ideas are discovered.

- -

Before we get going on "how" we add this extension, lets talk about "what" we -want. The basic idea is that we want to be able to write this sort of thing: -

- -
-
-def fib(x)
-  if x < 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-
- -

In Kaleidoscope, every construct is an expression: there are no statements. -As such, the if/then/else expression needs to return a value like any other. -Since we're using a mostly functional form, we'll have it evaluate its -conditional, then return the 'then' or 'else' value based on how the condition -was resolved. This is very similar to the C "?:" expression.

- -

The semantics of the if/then/else expression is that it evaluates the -condition to a boolean equality value: 0.0 is considered to be false and -everything else is considered to be true. -If the condition is true, the first subexpression is evaluated and returned, if -the condition is false, the second subexpression is evaluated and returned. -Since Kaleidoscope allows side-effects, this behavior is important to nail down. -

- -

Now that we know what we "want", lets break this down into its constituent -pieces.

- - -

Lexer Extensions for If/Then/Else

- - - -
- -

The lexer extensions are straightforward. First we add new enum values -for the relevant tokens:

- -
-
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-
-
- -

Once we have that, we recognize the new keywords in the lexer. This is pretty simple -stuff:

- -
-
-    ...
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    return tok_identifier;
-
-
- -
- - -

AST Extensions for If/Then/Else

- - -
- -

To represent the new expression we add a new AST node for it:

- -
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-    : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-
- -

The AST node just has pointers to the various subexpressions.

- -
- - -

Parser Extensions for If/Then/Else

- - -
- -

Now that we have the relevant tokens coming from the lexer and we have the -AST node to build, our parsing logic is relatively straightforward. First we -define a new parsing function:

- -
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-
- -

Next we hook it up as a primary expression:

- -
-
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  }
-}
-
-
- -
- - -

LLVM IR for If/Then/Else

- - -
- -

Now that we have it parsing and building the AST, the final piece is adding -LLVM code generation support. This is the most interesting part of the -if/then/else example, because this is where it starts to introduce new concepts. -All of the code above has been thoroughly described in previous chapters. -

- -

To motivate the code we want to produce, lets take a look at a simple -example. Consider:

- -
-
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-
-
- -

If you disable optimizations, the code you'll (soon) get from Kaleidoscope -looks like this:

- -
-
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:		; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-
-
- -

To visualize the control flow graph, you can use a nifty feature of the LLVM -'opt' tool. If you put this LLVM IR -into "t.ll" and run "llvm-as < t.ll | opt -analyze -view-cfg", a window will pop up and you'll -see this graph:

- -
Example CFG
- -

Another way to get this is to call "F->viewCFG()" or -"F->viewCFGOnly()" (where F is a "Function*") either by -inserting actual calls into the code and recompiling or by calling these in the -debugger. LLVM has many nice features for visualizing various graphs.

- -

Getting back to the generated code, it is fairly simple: the entry block -evaluates the conditional expression ("x" in our case here) and compares the -result to 0.0 with the "fcmp one" -instruction ('one' is "Ordered and Not Equal"). Based on the result of this -expression, the code jumps to either the "then" or "else" blocks, which contain -the expressions for the true/false cases.

- -

Once the then/else blocks are finished executing, they both branch back to the -'ifcont' block to execute the code that happens after the if/then/else. In this -case the only thing left to do is to return to the caller of the function. The -question then becomes: how does the code know which expression to return?

- -

The answer to this question involves an important SSA operation: the -Phi -operation. If you're not familiar with SSA, the wikipedia -article is a good introduction and there are various other introductions to -it available on your favorite search engine. The short version is that -"execution" of the Phi operation requires "remembering" which block control came -from. The Phi operation takes on the value corresponding to the input control -block. In this case, if control comes in from the "then" block, it gets the -value of "calltmp". If control comes from the "else" block, it gets the value -of "calltmp1".

- -

At this point, you are probably starting to think "Oh no! This means my -simple and elegant front-end will have to start generating SSA form in order to -use LLVM!". Fortunately, this is not the case, and we strongly advise -not implementing an SSA construction algorithm in your front-end -unless there is an amazingly good reason to do so. In practice, there are two -sorts of values that float around in code written for your average imperative -programming language that might need Phi nodes:

- -
    -
  1. Code that involves user variables: x = 1; x = x + 1;
  2. -
  3. Values that are implicit in the structure of your AST, such as the Phi node -in this case.
  4. -
- -

In Chapter 7 of this tutorial ("mutable -variables"), we'll talk about #1 -in depth. For now, just believe me that you don't need SSA construction to -handle this case. For #2, you have the choice of using the techniques that we will -describe for #1, or you can insert Phi nodes directly, if convenient. In this -case, it is really really easy to generate the Phi node, so we choose to do it -directly.

- -

Okay, enough of the motivation and overview, lets generate code!

- -
- - -

Code Generation for If/Then/Else

- - -
- -

In order to generate code for this, we implement the Codegen method -for IfExprAST:

- -
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-
-
- -

This code is straightforward and similar to what we saw before. We emit the -expression for the condition, then compare that value to zero to get a truth -value as a 1-bit (bool) value.

- -
-
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-
-
- -

This code creates the basic blocks that are related to the if/then/else -statement, and correspond directly to the blocks in the example above. The -first line gets the current Function object that is being built. It -gets this by asking the builder for the current BasicBlock, and asking that -block for its "parent" (the function it is currently embedded into).

- -

Once it has that, it creates three blocks. Note that it passes "TheFunction" -into the constructor for the "then" block. This causes the constructor to -automatically insert the new block into the end of the specified function. The -other two blocks are created, but aren't yet inserted into the function.

- -

Once the blocks are created, we can emit the conditional branch that chooses -between them. Note that creating new blocks does not implicitly affect the -IRBuilder, so it is still inserting into the block that the condition -went into. Also note that it is creating a branch to the "then" block and the -"else" block, even though the "else" block isn't inserted into the function yet. -This is all ok: it is the standard way that LLVM supports forward -references.

- -
-
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-
-
- -

After the conditional branch is inserted, we move the builder to start -inserting into the "then" block. Strictly speaking, this call moves the -insertion point to be at the end of the specified block. However, since the -"then" block is empty, it also starts out by inserting at the beginning of the -block. :)

- -

Once the insertion point is set, we recursively codegen the "then" expression -from the AST. To finish off the "then" block, we create an unconditional branch -to the merge block. One interesting (and very important) aspect of the LLVM IR -is that it requires all basic blocks -to be "terminated" with a control flow -instruction such as return or branch. This means that all control flow, -including fall throughs must be made explicit in the LLVM IR. If you -violate this rule, the verifier will emit an error.

- -

The final line here is quite subtle, but is very important. The basic issue -is that when we create the Phi node in the merge block, we need to set up the -block/value pairs that indicate how the Phi will work. Importantly, the Phi -node expects to have an entry for each predecessor of the block in the CFG. Why -then, are we getting the current block when we just set it to ThenBB 5 lines -above? The problem is that the "Then" expression may actually itself change the -block that the Builder is emitting into if, for example, it contains a nested -"if/then/else" expression. Because calling Codegen recursively could -arbitrarily change the notion of the current block, we are required to get an -up-to-date value for code that will set up the Phi node.

- -
-
-  // Emit else block.
-  TheFunction->getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-
-
- -

Code generation for the 'else' block is basically identical to codegen for -the 'then' block. The only significant difference is the first line, which adds -the 'else' block to the function. Recall previously that the 'else' block was -created, but not added to the function. Now that the 'then' and 'else' blocks -are emitted, we can finish up with the merge code:

- -
-
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-
- -

The first two lines here are now familiar: the first adds the "merge" block -to the Function object (it was previously floating, like the else block above). -The second block changes the insertion point so that newly created code will go -into the "merge" block. Once that is done, we need to create the PHI node and -set up the block/value pairs for the PHI.

- -

Finally, the CodeGen function returns the phi node as the value computed by -the if/then/else expression. In our example above, this returned value will -feed into the code for the top-level function, which will create the return -instruction.

- -

Overall, we now have the ability to execute conditional code in -Kaleidoscope. With this extension, Kaleidoscope is a fairly complete language -that can calculate a wide variety of numeric functions. Next up we'll add -another useful expression that is familiar from non-functional languages...

- -
- -
- - -

'for' Loop Expression

- - -
- -

Now that we know how to add basic control flow constructs to the language, -we have the tools to add more powerful things. Lets add something more -aggressive, a 'for' expression:

- -
-
- extern putchard(char)
- def printstar(n)
-   for i = 1, i < n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-     
- # print 100 '*' characters
- printstar(100);
-
-
- -

This expression defines a new variable ("i" in this case) which iterates from -a starting value, while the condition ("i < n" in this case) is true, -incrementing by an optional step value ("1.0" in this case). If the step value -is omitted, it defaults to 1.0. While the loop is true, it executes its -body expression. Because we don't have anything better to return, we'll just -define the loop as always returning 0.0. In the future when we have mutable -variables, it will get more useful.

- -

As before, lets talk about the changes that we need to Kaleidoscope to -support this.

- - -

Lexer Extensions for the 'for' Loop

- - -
- -

The lexer extensions are the same sort of thing as for if/then/else:

- -
-
-  ... in enum Token ...
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10
-
-  ... in gettok ...
-  if (IdentifierStr == "def") return tok_def;
-  if (IdentifierStr == "extern") return tok_extern;
-  if (IdentifierStr == "if") return tok_if;
-  if (IdentifierStr == "then") return tok_then;
-  if (IdentifierStr == "else") return tok_else;
-  if (IdentifierStr == "for") return tok_for;
-  if (IdentifierStr == "in") return tok_in;
-  return tok_identifier;
-
-
- -
- - -

AST Extensions for the 'for' Loop

- - -
- -

The AST node is just as simple. It basically boils down to capturing -the variable name and the constituent expressions in the node.

- -
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-
- -
- - -

Parser Extensions for the 'for' Loop

- - -
- -

The parser code is also fairly standard. The only interesting thing here is -handling of the optional step value. The parser code handles it by checking to -see if the second comma is present. If not, it sets the step value to null in -the AST node:

- -
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-
- -
- - -

LLVM IR for the 'for' Loop

- - -
- -

Now we get to the good part: the LLVM IR we want to generate for this thing. -With the simple example above, we get this LLVM IR (note that this dump is -generated with optimizations disabled for clarity): -

- -
-
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-  ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:		; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-  ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-  ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-  ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:		; preds = %loop
-  ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-
-
- -

This loop contains all the same constructs we saw before: a phi node, several -expressions, and some basic blocks. Lets see how this fits together.

- -
- - -

Code Generation for the 'for' Loop

- - -
- -

The first part of Codegen is very simple: we just output the start expression -for the loop value:

- -
-
-Value *ForExprAST::Codegen() {
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0) return 0;
-
-
- -

With this out of the way, the next step is to set up the LLVM basic block -for the start of the loop body. In the case above, the whole loop body is one -block, but remember that the body code itself could consist of multiple blocks -(e.g. if it contains an if/then/else or a for/in expression).

- -
-
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-
- -

This code is similar to what we saw for if/then/else. Because we will need -it to create the Phi node, we remember the block that falls through into the -loop. Once we have that, we create the actual block that starts the loop and -create an unconditional branch for the fall-through between the two blocks.

- -
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable->addIncoming(StartVal, PreheaderBB);
-
-
- -

Now that the "preheader" for the loop is set up, we switch to emitting code -for the loop body. To begin with, we move the insertion point and create the -PHI node for the loop induction variable. Since we already know the incoming -value for the starting value, we add it to the Phi node. Note that the Phi will -eventually get a second value for the backedge, but we can't set it up yet -(because it doesn't exist!).

- -
-
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
-
-
- -

Now the code starts to get more interesting. Our 'for' loop introduces a new -variable to the symbol table. This means that our symbol table can now contain -either function arguments or loop variables. To handle this, before we codegen -the body of the loop, we add the loop variable as the current value for its -name. Note that it is possible that there is a variable of the same name in the -outer scope. It would be easy to make this an error (emit an error and return -null if there is already an entry for VarName) but we choose to allow shadowing -of variables. In order to handle this correctly, we remember the Value that -we are potentially shadowing in OldVal (which will be null if there is -no shadowed variable).

- -

Once the loop variable is set into the symbol table, the code recursively -codegen's the body. This allows the body to use the loop variable: any -references to it will naturally find it in the symbol table.

- -
-
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-
- -

Now that the body is emitted, we compute the next value of the iteration -variable by adding the step value, or 1.0 if it isn't present. 'NextVar' -will be the value of the loop variable on the next iteration of the loop.

- -
-
-  // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-
-
- -

Finally, we evaluate the exit value of the loop, to determine whether the -loop should exit. This mirrors the condition evaluation for the if/then/else -statement.

- -
-
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-
-
- -

With the code for the body of the loop complete, we just need to finish up -the control flow for it. This code remembers the end block (for the phi node), -then creates the block for the loop exit ("afterloop"). Based on the value of -the exit condition, it creates a conditional branch that chooses between -executing the loop again and exiting the loop. Any future code is emitted in -the "afterloop" block, so it sets the insertion position to it.

- -
-
-  // Add a new entry to the PHI node for the backedge.
-  Variable->addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-
- -

The final code handles various cleanups: now that we have the "NextVar" -value, we can add the incoming value to the loop PHI node. After that, we -remove the loop variable from the symbol table, so that it isn't in scope after -the for loop. Finally, code generation of the for loop always returns 0.0, so -that is what we return from ForExprAST::Codegen.

- -

With this, we conclude the "adding control flow to Kaleidoscope" chapter of -the tutorial. In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors -to know. In the next chapter of our saga, we will get a bit crazier and add -user-defined operators to our poor innocent -language.

- -
- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -if/then/else and for expressions.. To build this example, use: -

- -
-
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-
-
- -

Here is the code:

- -
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction->getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable->addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable->addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
-  return 0;
-}
-
-
- -Next: Extending the language: user-defined operators -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst new file mode 100644 index 000000000000..80d5f37bc4cd --- /dev/null +++ b/docs/tutorial/LangImpl5.rst @@ -0,0 +1,1607 @@ +================================================== +Kaleidoscope: Extending the Language: Control Flow +================================================== + +.. contents:: + :local: + +Chapter 5 Introduction +====================== + +Welcome to Chapter 5 of the "`Implementing a language with +LLVM `_" tutorial. Parts 1-4 described the implementation of +the simple Kaleidoscope language and included support for generating +LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as +presented, Kaleidoscope is mostly useless: it has no control flow other +than call and return. This means that you can't have conditional +branches in the code, significantly limiting its power. In this episode +of "build that compiler", we'll extend Kaleidoscope to have an +if/then/else expression plus a simple 'for' loop. + +If/Then/Else +============ + +Extending Kaleidoscope to support if/then/else is quite straightforward. +It basically requires adding support for this "new" concept to the +lexer, parser, AST, and LLVM code emitter. This example is nice, because +it shows how easy it is to "grow" a language over time, incrementally +extending it as new ideas are discovered. + +Before we get going on "how" we add this extension, lets talk about +"what" we want. The basic idea is that we want to be able to write this +sort of thing: + +:: + + def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2); + +In Kaleidoscope, every construct is an expression: there are no +statements. As such, the if/then/else expression needs to return a value +like any other. Since we're using a mostly functional form, we'll have +it evaluate its conditional, then return the 'then' or 'else' value +based on how the condition was resolved. This is very similar to the C +"?:" expression. + +The semantics of the if/then/else expression is that it evaluates the +condition to a boolean equality value: 0.0 is considered to be false and +everything else is considered to be true. If the condition is true, the +first subexpression is evaluated and returned, if the condition is +false, the second subexpression is evaluated and returned. Since +Kaleidoscope allows side-effects, this behavior is important to nail +down. + +Now that we know what we "want", lets break this down into its +constituent pieces. + +Lexer Extensions for If/Then/Else +--------------------------------- + +The lexer extensions are straightforward. First we add new enum values +for the relevant tokens: + +.. code-block:: c++ + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + +Once we have that, we recognize the new keywords in the lexer. This is +pretty simple stuff: + +.. code-block:: c++ + + ... + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + return tok_identifier; + +AST Extensions for If/Then/Else +------------------------------- + +To represent the new expression we add a new AST node for it: + +.. code-block:: c++ + + /// IfExprAST - Expression class for if/then/else. + class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; + public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); + }; + +The AST node just has pointers to the various subexpressions. + +Parser Extensions for If/Then/Else +---------------------------------- + +Now that we have the relevant tokens coming from the lexer and we have +the AST node to build, our parsing logic is relatively straightforward. +First we define a new parsing function: + +.. code-block:: c++ + + /// ifexpr ::= 'if' expression 'then' expression 'else' expression + static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); + } + +Next we hook it up as a primary expression: + +.. code-block:: c++ + + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + } + } + +LLVM IR for If/Then/Else +------------------------ + +Now that we have it parsing and building the AST, the final piece is +adding LLVM code generation support. This is the most interesting part +of the if/then/else example, because this is where it starts to +introduce new concepts. All of the code above has been thoroughly +described in previous chapters. + +To motivate the code we want to produce, lets take a look at a simple +example. Consider: + +:: + + extern foo(); + extern bar(); + def baz(x) if x then foo() else bar(); + +If you disable optimizations, the code you'll (soon) get from +Kaleidoscope looks like this: + +.. code-block:: llvm + + declare double @foo() + + declare double @bar() + + define double @baz(double %x) { + entry: + %ifcond = fcmp one double %x, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: ; preds = %entry + %calltmp = call double @foo() + br label %ifcont + + else: ; preds = %entry + %calltmp1 = call double @bar() + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ] + ret double %iftmp + } + +To visualize the control flow graph, you can use a nifty feature of the +LLVM '`opt `_' tool. If you put this LLVM +IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a +window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll +see this graph: + +.. figure:: LangImpl5-cfg.png + :align: center + :alt: Example CFG + + Example CFG + +Another way to get this is to call "``F->viewCFG()``" or +"``F->viewCFGOnly()``" (where F is a "``Function*``") either by +inserting actual calls into the code and recompiling or by calling these +in the debugger. LLVM has many nice features for visualizing various +graphs. + +Getting back to the generated code, it is fairly simple: the entry block +evaluates the conditional expression ("x" in our case here) and compares +the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered +and Not Equal"). Based on the result of this expression, the code jumps +to either the "then" or "else" blocks, which contain the expressions for +the true/false cases. + +Once the then/else blocks are finished executing, they both branch back +to the 'ifcont' block to execute the code that happens after the +if/then/else. In this case the only thing left to do is to return to the +caller of the function. The question then becomes: how does the code +know which expression to return? + +The answer to this question involves an important SSA operation: the +`Phi +operation `_. +If you're not familiar with SSA, `the wikipedia +article `_ +is a good introduction and there are various other introductions to it +available on your favorite search engine. The short version is that +"execution" of the Phi operation requires "remembering" which block +control came from. The Phi operation takes on the value corresponding to +the input control block. In this case, if control comes in from the +"then" block, it gets the value of "calltmp". If control comes from the +"else" block, it gets the value of "calltmp1". + +At this point, you are probably starting to think "Oh no! This means my +simple and elegant front-end will have to start generating SSA form in +order to use LLVM!". Fortunately, this is not the case, and we strongly +advise *not* implementing an SSA construction algorithm in your +front-end unless there is an amazingly good reason to do so. In +practice, there are two sorts of values that float around in code +written for your average imperative programming language that might need +Phi nodes: + +#. Code that involves user variables: ``x = 1; x = x + 1;`` +#. Values that are implicit in the structure of your AST, such as the + Phi node in this case. + +In `Chapter 7 `_ of this tutorial ("mutable variables"), +we'll talk about #1 in depth. For now, just believe me that you don't +need SSA construction to handle this case. For #2, you have the choice +of using the techniques that we will describe for #1, or you can insert +Phi nodes directly, if convenient. In this case, it is really really +easy to generate the Phi node, so we choose to do it directly. + +Okay, enough of the motivation and overview, lets generate code! + +Code Generation for If/Then/Else +-------------------------------- + +In order to generate code for this, we implement the ``Codegen`` method +for ``IfExprAST``: + +.. code-block:: c++ + + Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + +This code is straightforward and similar to what we saw before. We emit +the expression for the condition, then compare that value to zero to get +a truth value as a 1-bit (bool) value. + +.. code-block:: c++ + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + +This code creates the basic blocks that are related to the if/then/else +statement, and correspond directly to the blocks in the example above. +The first line gets the current Function object that is being built. It +gets this by asking the builder for the current BasicBlock, and asking +that block for its "parent" (the function it is currently embedded +into). + +Once it has that, it creates three blocks. Note that it passes +"TheFunction" into the constructor for the "then" block. This causes the +constructor to automatically insert the new block into the end of the +specified function. The other two blocks are created, but aren't yet +inserted into the function. + +Once the blocks are created, we can emit the conditional branch that +chooses between them. Note that creating new blocks does not implicitly +affect the IRBuilder, so it is still inserting into the block that the +condition went into. Also note that it is creating a branch to the +"then" block and the "else" block, even though the "else" block isn't +inserted into the function yet. This is all ok: it is the standard way +that LLVM supports forward references. + +.. code-block:: c++ + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + +After the conditional branch is inserted, we move the builder to start +inserting into the "then" block. Strictly speaking, this call moves the +insertion point to be at the end of the specified block. However, since +the "then" block is empty, it also starts out by inserting at the +beginning of the block. :) + +Once the insertion point is set, we recursively codegen the "then" +expression from the AST. To finish off the "then" block, we create an +unconditional branch to the merge block. One interesting (and very +important) aspect of the LLVM IR is that it `requires all basic blocks +to be "terminated" <../LangRef.html#functionstructure>`_ with a `control +flow instruction <../LangRef.html#terminators>`_ such as return or +branch. This means that all control flow, *including fall throughs* must +be made explicit in the LLVM IR. If you violate this rule, the verifier +will emit an error. + +The final line here is quite subtle, but is very important. The basic +issue is that when we create the Phi node in the merge block, we need to +set up the block/value pairs that indicate how the Phi will work. +Importantly, the Phi node expects to have an entry for each predecessor +of the block in the CFG. Why then, are we getting the current block when +we just set it to ThenBB 5 lines above? The problem is that the "Then" +expression may actually itself change the block that the Builder is +emitting into if, for example, it contains a nested "if/then/else" +expression. Because calling Codegen recursively could arbitrarily change +the notion of the current block, we are required to get an up-to-date +value for code that will set up the Phi node. + +.. code-block:: c++ + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + +Code generation for the 'else' block is basically identical to codegen +for the 'then' block. The only significant difference is the first line, +which adds the 'else' block to the function. Recall previously that the +'else' block was created, but not added to the function. Now that the +'then' and 'else' blocks are emitted, we can finish up with the merge +code: + +.. code-block:: c++ + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; + } + +The first two lines here are now familiar: the first adds the "merge" +block to the Function object (it was previously floating, like the else +block above). The second block changes the insertion point so that newly +created code will go into the "merge" block. Once that is done, we need +to create the PHI node and set up the block/value pairs for the PHI. + +Finally, the CodeGen function returns the phi node as the value computed +by the if/then/else expression. In our example above, this returned +value will feed into the code for the top-level function, which will +create the return instruction. + +Overall, we now have the ability to execute conditional code in +Kaleidoscope. With this extension, Kaleidoscope is a fairly complete +language that can calculate a wide variety of numeric functions. Next up +we'll add another useful expression that is familiar from non-functional +languages... + +'for' Loop Expression +===================== + +Now that we know how to add basic control flow constructs to the +language, we have the tools to add more powerful things. Lets add +something more aggressive, a 'for' expression: + +:: + + extern putchard(char) + def printstar(n) + for i = 1, i < n, 1.0 in + putchard(42); # ascii 42 = '*' + + # print 100 '*' characters + printstar(100); + +This expression defines a new variable ("i" in this case) which iterates +from a starting value, while the condition ("i < n" in this case) is +true, incrementing by an optional step value ("1.0" in this case). If +the step value is omitted, it defaults to 1.0. While the loop is true, +it executes its body expression. Because we don't have anything better +to return, we'll just define the loop as always returning 0.0. In the +future when we have mutable variables, it will get more useful. + +As before, lets talk about the changes that we need to Kaleidoscope to +support this. + +Lexer Extensions for the 'for' Loop +----------------------------------- + +The lexer extensions are the same sort of thing as for if/then/else: + +.. code-block:: c++ + + ... in enum Token ... + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10 + + ... in gettok ... + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + return tok_identifier; + +AST Extensions for the 'for' Loop +--------------------------------- + +The AST node is just as simple. It basically boils down to capturing the +variable name and the constituent expressions in the node. + +.. code-block:: c++ + + /// ForExprAST - Expression class for for/in. + class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; + public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); + }; + +Parser Extensions for the 'for' Loop +------------------------------------ + +The parser code is also fairly standard. The only interesting thing here +is handling of the optional step value. The parser code handles it by +checking to see if the second comma is present. If not, it sets the step +value to null in the AST node: + +.. code-block:: c++ + + /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression + static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); + } + +LLVM IR for the 'for' Loop +-------------------------- + +Now we get to the good part: the LLVM IR we want to generate for this +thing. With the simple example above, we get this LLVM IR (note that +this dump is generated with optimizations disabled for clarity): + +.. code-block:: llvm + + declare double @putchard(double) + + define double @printstar(double %n) { + entry: + ; initial value = 1.0 (inlined into phi) + br label %loop + + loop: ; preds = %loop, %entry + %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ] + ; body + %calltmp = call double @putchard(double 4.200000e+01) + ; increment + %nextvar = fadd double %i, 1.000000e+00 + + ; termination test + %cmptmp = fcmp ult double %i, %n + %booltmp = uitofp i1 %cmptmp to double + %loopcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %loopcond, label %loop, label %afterloop + + afterloop: ; preds = %loop + ; loop always returns 0.0 + ret double 0.000000e+00 + } + +This loop contains all the same constructs we saw before: a phi node, +several expressions, and some basic blocks. Lets see how this fits +together. + +Code Generation for the 'for' Loop +---------------------------------- + +The first part of Codegen is very simple: we just output the start +expression for the loop value: + +.. code-block:: c++ + + Value *ForExprAST::Codegen() { + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + +With this out of the way, the next step is to set up the LLVM basic +block for the start of the loop body. In the case above, the whole loop +body is one block, but remember that the body code itself could consist +of multiple blocks (e.g. if it contains an if/then/else or a for/in +expression). + +.. code-block:: c++ + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + +This code is similar to what we saw for if/then/else. Because we will +need it to create the Phi node, we remember the block that falls through +into the loop. Once we have that, we create the actual block that starts +the loop and create an unconditional branch for the fall-through between +the two blocks. + +.. code-block:: c++ + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + +Now that the "preheader" for the loop is set up, we switch to emitting +code for the loop body. To begin with, we move the insertion point and +create the PHI node for the loop induction variable. Since we already +know the incoming value for the starting value, we add it to the Phi +node. Note that the Phi will eventually get a second value for the +backedge, but we can't set it up yet (because it doesn't exist!). + +.. code-block:: c++ + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + +Now the code starts to get more interesting. Our 'for' loop introduces a +new variable to the symbol table. This means that our symbol table can +now contain either function arguments or loop variables. To handle this, +before we codegen the body of the loop, we add the loop variable as the +current value for its name. Note that it is possible that there is a +variable of the same name in the outer scope. It would be easy to make +this an error (emit an error and return null if there is already an +entry for VarName) but we choose to allow shadowing of variables. In +order to handle this correctly, we remember the Value that we are +potentially shadowing in ``OldVal`` (which will be null if there is no +shadowed variable). + +Once the loop variable is set into the symbol table, the code +recursively codegen's the body. This allows the body to use the loop +variable: any references to it will naturally find it in the symbol +table. + +.. code-block:: c++ + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + +Now that the body is emitted, we compute the next value of the iteration +variable by adding the step value, or 1.0 if it isn't present. +'``NextVar``' will be the value of the loop variable on the next +iteration of the loop. + +.. code-block:: c++ + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + +Finally, we evaluate the exit value of the loop, to determine whether +the loop should exit. This mirrors the condition evaluation for the +if/then/else statement. + +.. code-block:: c++ + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + +With the code for the body of the loop complete, we just need to finish +up the control flow for it. This code remembers the end block (for the +phi node), then creates the block for the loop exit ("afterloop"). Based +on the value of the exit condition, it creates a conditional branch that +chooses between executing the loop again and exiting the loop. Any +future code is emitted in the "afterloop" block, so it sets the +insertion position to it. + +.. code-block:: c++ + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); + } + +The final code handles various cleanups: now that we have the "NextVar" +value, we can add the incoming value to the loop PHI node. After that, +we remove the loop variable from the symbol table, so that it isn't in +scope after the for loop. Finally, code generation of the for loop +always returns 0.0, so that is what we return from +``ForExprAST::Codegen``. + +With this, we conclude the "adding control flow to Kaleidoscope" chapter +of the tutorial. In this chapter we added two control flow constructs, +and used them to motivate a couple of aspects of the LLVM IR that are +important for front-end implementors to know. In the next chapter of our +saga, we will get a bit crazier and add `user-defined +operators `_ to our poor innocent language. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions.. To build this example, use: + +.. code-block:: bash + + # Compile + clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy + # Run + ./toy + +Here is the code: + +.. code-block:: c++ + + #include "llvm/DerivedTypes.h" + #include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/ExecutionEngine/JIT.h" + #include "llvm/IRBuilder.h" + #include "llvm/LLVMContext.h" + #include "llvm/Module.h" + #include "llvm/PassManager.h" + #include "llvm/Analysis/Verifier.h" + #include "llvm/Analysis/Passes.h" + #include "llvm/DataLayout.h" + #include "llvm/Transforms/Scalar.h" + #include "llvm/Support/TargetSelect.h" + #include + #include + #include + #include + using namespace llvm; + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); + }; + + /// IfExprAST - Expression class for if/then/else. + class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; + public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); + }; + + /// ForExprAST - Expression class for for/in. + class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; + public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes). + class PrototypeAST { + std::string Name; + std::vector Args; + public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// ifexpr ::= 'if' expression 'then' expression 'else' expression + static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); + } + + /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression + static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + /// ::= ifexpr + /// ::= forexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } + } + + /// binoprhs + /// ::= ('+' primary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= primary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Code Generation + //===----------------------------------------------------------------------===// + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + static FunctionPassManager *TheFPM; + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); + } + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } + } + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + + Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; + } + + Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); + } + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; + } + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing and JIT Driver + //===----------------------------------------------------------------------===// + + static ExecutionEngine *TheExecutionEngine; + + static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // "Library" functions that can be "extern'd" from user code. + //===----------------------------------------------------------------------===// + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; + } + +`Next: Extending the language: user-defined operators `_ + diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html deleted file mode 100644 index 7cd87da79229..000000000000 --- a/docs/tutorial/LangImpl6.html +++ /dev/null @@ -1,1829 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: User-defined Operators - - - - - - - -

Kaleidoscope: Extending the Language: User-defined Operators

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 6 Introduction

- - -
- -

Welcome to Chapter 6 of the "Implementing a language -with LLVM" tutorial. At this point in our tutorial, we now have a fully -functional language that is fairly minimal, but also useful. There -is still one big problem with it, however. Our language doesn't have many -useful operators (like division, logical negation, or even any comparisons -besides less-than).

- -

This chapter of the tutorial takes a wild digression into adding user-defined -operators to the simple and beautiful Kaleidoscope language. This digression now gives -us a simple and ugly language in some ways, but also a powerful one at the same time. -One of the great things about creating your own language is that you get to -decide what is good or bad. In this tutorial we'll assume that it is okay to -use this as a way to show some interesting parsing techniques.

- -

At the end of this tutorial, we'll run through an example Kaleidoscope -application that renders the Mandelbrot set. This gives -an example of what you can build with Kaleidoscope and its feature set.

- -
- - -

User-defined Operators: the Idea

- - -
- -

-The "operator overloading" that we will add to Kaleidoscope is more general than -languages like C++. In C++, you are only allowed to redefine existing -operators: you can't programatically change the grammar, introduce new -operators, change precedence levels, etc. In this chapter, we will add this -capability to Kaleidoscope, which will let the user round out the set of -operators that are supported.

- -

The point of going into user-defined operators in a tutorial like this is to -show the power and flexibility of using a hand-written parser. Thus far, the parser -we have been implementing uses recursive descent for most parts of the grammar and -operator precedence parsing for the expressions. See Chapter 2 for details. Without using operator -precedence parsing, it would be very difficult to allow the programmer to -introduce new operators into the grammar: the grammar is dynamically extensible -as the JIT runs.

- -

The two specific features we'll add are programmable unary operators (right -now, Kaleidoscope has no unary operators at all) as well as binary operators. -An example of this is:

- -
-
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define > with the same precedence as <.
-def binary> 10 (LHS RHS)
-  RHS < LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS < RHS | LHS > RHS);
-
-
- -

Many languages aspire to being able to implement their standard runtime -library in the language itself. In Kaleidoscope, we can implement significant -parts of the language in the library!

- -

We will break down implementation of these features into two parts: -implementing support for user-defined binary operators and adding unary -operators.

- -
- - -

User-defined Binary Operators

- - -
- -

Adding support for user-defined binary operators is pretty simple with our -current framework. We'll first add support for the unary/binary keywords:

- -
-
-enum Token {
-  ...
-  // operators
-  tok_binary = -11, tok_unary = -12
-};
-...
-static int gettok() {
-...
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    return tok_identifier;
-
-
- -

This just adds lexer support for the unary and binary keywords, like we -did in previous chapters. One nice thing -about our current AST, is that we represent binary operators with full generalisation -by using their ASCII code as the opcode. For our extended operators, we'll use this -same representation, so we don't need any new AST or parser support.

- -

On the other hand, we have to be able to represent the definitions of these -new operators, in the "def binary| 5" part of the function definition. In our -grammar so far, the "name" for the function definition is parsed as the -"prototype" production and into the PrototypeAST AST node. To -represent our new user-defined operators as prototypes, we have to extend -the PrototypeAST AST node like this:

- -
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-};
-
-
- -

Basically, in addition to knowing a name for the prototype, we now keep track -of whether it was an operator, and if it was, what precedence level the operator -is at. The precedence is only used for binary operators (as you'll see below, -it just doesn't apply for unary operators). Now that we have a way to represent -the prototype for a user-defined operator, we need to parse it:

- -
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal < 1 || NumVal > 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind && ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-
- -

This is all fairly straightforward parsing code, and we have already seen -a lot of similar code in the past. One interesting part about the code above is -the couple lines that set up FnName for binary operators. This builds names -like "binary@" for a newly defined "@" operator. This then takes advantage of the -fact that symbol names in the LLVM symbol table are allowed to have any character in -them, including embedded nul characters.

- -

The next interesting thing to add, is codegen support for these binary operators. -Given our current structure, this is a simple addition of a default case for our -existing binary operator node:

- -
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary")+Op);
-  assert(F && "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-
-
- -

As you can see above, the new code is actually really simple. It just does -a lookup for the appropriate operator in the symbol table and generates a -function call to it. Since user-defined operators are just built as normal -functions (because the "prototype" boils down to a function with the right -name) everything falls into place.

- -

The final piece of code we are missing, is a bit of top-level magic:

- -
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-    ...
-
-
- -

Basically, before codegening a function, if it is a user-defined operator, we -register it in the precedence table. This allows the binary operator parsing -logic we already have in place to handle it. Since we are working on a fully-general operator precedence parser, this is all we need to do to "extend the grammar".

- -

Now we have useful user-defined binary operators. This builds a lot -on the previous framework we built for other operators. Adding unary operators -is a bit more challenging, because we don't have any framework for it yet - lets -see what it takes.

- -
- - -

User-defined Unary Operators

- - -
- -

Since we don't currently support unary operators in the Kaleidoscope -language, we'll need to add everything to support them. Above, we added simple -support for the 'unary' keyword to the lexer. In addition to that, we need an -AST node:

- -
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-
- -

This AST node is very simple and obvious by now. It directly mirrors the -binary operator AST node, except that it only has one child. With this, we -need to add the parsing logic. Parsing a unary operator is pretty simple: we'll -add a new function to do it:

- -
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-
- -

The grammar we add is pretty straightforward here. If we see a unary -operator when parsing a primary operator, we eat the operator as a prefix and -parse the remaining piece as another unary operator. This allows us to handle -multiple unary operators (e.g. "!!x"). Note that unary operators can't have -ambiguous parses like binary operators can, so there is no need for precedence -information.

- -

The problem with this function, is that we need to call ParseUnary from somewhere. -To do this, we change previous callers of ParsePrimary to call ParseUnary -instead:

- -
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  ...
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-  ...
-}
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-
- -

With these two simple changes, we are now able to parse unary operators and build the -AST for them. Next up, we need to add parser support for prototypes, to parse -the unary operator prototype. We extend the binary operator code above -with:

- -
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    ...
-
-
- -

As with binary operators, we name unary operators with a name that includes -the operator character. This assists us at code generation time. Speaking of, -the final piece we need to add is codegen support for unary operators. It looks -like this:

- -
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-
- -

This code is similar to, but simpler than, the code for binary operators. It -is simpler primarily because it doesn't need to handle any predefined operators. -

- -
- - -

Kicking the Tires

- - -
- -

It is somewhat hard to believe, but with a few simple extensions we've -covered in the last chapters, we have grown a real-ish language. With this, we -can do a lot of interesting things, including I/O, math, and a bunch of other -things. For example, we can now add a nice sequencing operator (printd is -defined to print out the specified value and a newline):

- -
-
-ready> extern printd(x);
-Read extern:
-declare double @printd(double)
-
-ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
-..
-ready> printd(123) : printd(456) : printd(789);
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-
-
- -

We can also define a bunch of other "primitive" operations, such as:

- -
-
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-    
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define > with the same precedence as <.
-def binary> 10 (LHS RHS)
-  RHS < LHS;
-
-# Binary logical or, which does not short circuit. 
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit. 
-def binary& 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS < RHS | LHS > RHS);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-
- - -

Given the previous if/then/else support, we can also define interesting -functions for I/O. For example, the following prints out a character whose -"density" reflects the value passed in: the lower the value, the denser the -character:

- -
-
-ready>
-
-extern putchard(char)
-def printdensity(d)
-  if d > 8 then
-    putchard(32)  # ' '
-  else if d > 4 then
-    putchard(46)  # '.'
-  else if d > 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'
-...
-ready> printdensity(1): printdensity(2): printdensity(3):
-       printdensity(4): printdensity(5): printdensity(9):
-       putchard(10);
-**++.
-Evaluated to 0.000000
-
-
- -

Based on these simple primitive operations, we can start to define more -interesting things. For example, here's a little function that solves for the -number of iterations it takes a function in the complex plane to -converge:

- -
-
-# Determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters > 255 | (real*real + imag*imag > 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# Return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-
-
- -

This "z = z2 + c" function is a beautiful little -creature that is the basis for computation of -the Mandelbrot Set. -Our mandelconverge function returns the number of iterations that it -takes for a complex orbit to escape, saturating to 255. This is not a very -useful function by itself, but if you plot its value over a two-dimensional -plane, you can see the Mandelbrot set. Given that we are limited to using -putchard here, our amazing graphical output is limited, but we can whip together -something using the density plotter above:

- -
-
-# Compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y < ymax, ystep in (
-    (for x = xmin, x < xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
- 
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag) 
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-
-
- -

Given this, we can try plotting out the mandlebrot set! Lets try it out:

- -
-
-ready> mandel(-2.3, -1.3, 0.05, 0.07);
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready> mandel(-2, -1, 0.02, 0.04);
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        . 
-**********++++++++++++++++++++++++++++++++++++++++++++++.............          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-******+++++++++++++++++++++++++++++++++++...........................           
-*****++++++++++++++++++++++++++++++++............................              
-*****++++++++++++++++++++++++++++...............................               
-****++++++++++++++++++++++++++......   .........................               
-***++++++++++++++++++++++++.........     ......    ...........                 
-***++++++++++++++++++++++............                                          
-**+++++++++++++++++++++..............                                          
-**+++++++++++++++++++................                                          
-*++++++++++++++++++.................                                           
-*++++++++++++++++............ ...                                              
-*++++++++++++++..............                                                  
-*+++....++++................                                                   
-*..........  ...........                                                       
-*                                                                              
-*..........  ...........                                                       
-*+++....++++................                                                   
-*++++++++++++++..............                                                  
-*++++++++++++++++............ ...                                              
-*++++++++++++++++++.................                                           
-**+++++++++++++++++++................                                          
-**+++++++++++++++++++++..............                                          
-***++++++++++++++++++++++............                                          
-***++++++++++++++++++++++++.........     ......    ...........                 
-****++++++++++++++++++++++++++......   .........................               
-*****++++++++++++++++++++++++++++...............................               
-*****++++++++++++++++++++++++++++++++............................              
-******+++++++++++++++++++++++++++++++++++...........................           
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-Evaluated to 0.000000
-ready> mandel(-0.9, -1.4, 0.02, 0.03);
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready> ^D
-
-
- -

At this point, you may be starting to realize that Kaleidoscope is a real -and powerful language. It may not be self-similar :), but it can be used to -plot things that are!

- -

With this, we conclude the "adding user-defined operators" chapter of the -tutorial. We have successfully augmented our language, adding the ability to extend the -language in the library, and we have shown how this can be used to build a simple but -interesting end-user application in Kaleidoscope. At this point, Kaleidoscope -can build a variety of applications that are functional and can call functions -with side-effects, but it can't actually define and mutate a variable itself. -

- -

Strikingly, variable mutation is an important feature of some -languages, and it is not at all obvious how to add -support for mutable variables without having to add an "SSA construction" -phase to your front-end. In the next chapter, we will describe how you can -add variable mutation without building SSA in your front-end.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -if/then/else and for expressions.. To build this example, use: -

- -
-
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-
-
- -

On some platforms, you will need to specify -rdynamic or -Wl,--export-dynamic -when linking. This ensures that symbols defined in the main executable are -exported to the dynamic linker and so are available for symbol resolution at -run time. This is not needed if you compile your support code into a shared -library, although doing that will cause problems on Windows.

- -

Here is the code:

- -
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal < 1 || NumVal > 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind && ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary")+Op);
-  assert(F && "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction->getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable->addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable->addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-
-  if (Proto->isBinaryOp())
-    BinopPrecedence.erase(Proto->getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
-  return 0;
-}
-
-
- -Next: Extending the language: mutable variables / SSA construction -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/LangImpl6.rst b/docs/tutorial/LangImpl6.rst new file mode 100644 index 000000000000..a5a60bffe04a --- /dev/null +++ b/docs/tutorial/LangImpl6.rst @@ -0,0 +1,1726 @@ +============================================================ +Kaleidoscope: Extending the Language: User-defined Operators +============================================================ + +.. contents:: + :local: + +Chapter 6 Introduction +====================== + +Welcome to Chapter 6 of the "`Implementing a language with +LLVM `_" tutorial. At this point in our tutorial, we now +have a fully functional language that is fairly minimal, but also +useful. There is still one big problem with it, however. Our language +doesn't have many useful operators (like division, logical negation, or +even any comparisons besides less-than). + +This chapter of the tutorial takes a wild digression into adding +user-defined operators to the simple and beautiful Kaleidoscope +language. This digression now gives us a simple and ugly language in +some ways, but also a powerful one at the same time. One of the great +things about creating your own language is that you get to decide what +is good or bad. In this tutorial we'll assume that it is okay to use +this as a way to show some interesting parsing techniques. + +At the end of this tutorial, we'll run through an example Kaleidoscope +application that `renders the Mandelbrot set <#example>`_. This gives an +example of what you can build with Kaleidoscope and its feature set. + +User-defined Operators: the Idea +================================ + +The "operator overloading" that we will add to Kaleidoscope is more +general than languages like C++. In C++, you are only allowed to +redefine existing operators: you can't programatically change the +grammar, introduce new operators, change precedence levels, etc. In this +chapter, we will add this capability to Kaleidoscope, which will let the +user round out the set of operators that are supported. + +The point of going into user-defined operators in a tutorial like this +is to show the power and flexibility of using a hand-written parser. +Thus far, the parser we have been implementing uses recursive descent +for most parts of the grammar and operator precedence parsing for the +expressions. See `Chapter 2 `_ for details. Without +using operator precedence parsing, it would be very difficult to allow +the programmer to introduce new operators into the grammar: the grammar +is dynamically extensible as the JIT runs. + +The two specific features we'll add are programmable unary operators +(right now, Kaleidoscope has no unary operators at all) as well as +binary operators. An example of this is: + +:: + + # Logical unary not. + def unary!(v) + if v then + 0 + else + 1; + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) + RHS < LHS; + + # Binary "logical or", (note that it does not "short circuit") + def binary| 5 (LHS RHS) + if LHS then + 1 + else if RHS then + 1 + else + 0; + + # Define = with slightly lower precedence than relationals. + def binary= 9 (LHS RHS) + !(LHS < RHS | LHS > RHS); + +Many languages aspire to being able to implement their standard runtime +library in the language itself. In Kaleidoscope, we can implement +significant parts of the language in the library! + +We will break down implementation of these features into two parts: +implementing support for user-defined binary operators and adding unary +operators. + +User-defined Binary Operators +============================= + +Adding support for user-defined binary operators is pretty simple with +our current framework. We'll first add support for the unary/binary +keywords: + +.. code-block:: c++ + + enum Token { + ... + // operators + tok_binary = -11, tok_unary = -12 + }; + ... + static int gettok() { + ... + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + return tok_identifier; + +This just adds lexer support for the unary and binary keywords, like we +did in `previous chapters `_. One nice thing +about our current AST, is that we represent binary operators with full +generalisation by using their ASCII code as the opcode. For our extended +operators, we'll use this same representation, so we don't need any new +AST or parser support. + +On the other hand, we have to be able to represent the definitions of +these new operators, in the "def binary\| 5" part of the function +definition. In our grammar so far, the "name" for the function +definition is parsed as the "prototype" production and into the +``PrototypeAST`` AST node. To represent our new user-defined operators +as prototypes, we have to extend the ``PrototypeAST`` AST node like +this: + +.. code-block:: c++ + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its argument names as well as if it is an operator. + class PrototypeAST { + std::string Name; + std::vector Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. + public: + PrototypeAST(const std::string &name, const std::vector &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + }; + +Basically, in addition to knowing a name for the prototype, we now keep +track of whether it was an operator, and if it was, what precedence +level the operator is at. The precedence is only used for binary +operators (as you'll see below, it just doesn't apply for unary +operators). Now that we have a way to represent the prototype for a +user-defined operator, we need to parse it: + +.. code-block:: c++ + + /// prototype + /// ::= id '(' id* ')' + /// ::= binary LETTER number? (id, id) + static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + } + +This is all fairly straightforward parsing code, and we have already +seen a lot of similar code in the past. One interesting part about the +code above is the couple lines that set up ``FnName`` for binary +operators. This builds names like "binary@" for a newly defined "@" +operator. This then takes advantage of the fact that symbol names in the +LLVM symbol table are allowed to have any character in them, including +embedded nul characters. + +The next interesting thing to add, is codegen support for these binary +operators. Given our current structure, this is a simple addition of a +default case for our existing binary operator node: + +.. code-block:: c++ + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[2] = { L, R }; + return Builder.CreateCall(F, Ops, "binop"); + } + +As you can see above, the new code is actually really simple. It just +does a lookup for the appropriate operator in the symbol table and +generates a function call to it. Since user-defined operators are just +built as normal functions (because the "prototype" boils down to a +function with the right name) everything falls into place. + +The final piece of code we are missing, is a bit of top-level magic: + +.. code-block:: c++ + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + ... + +Basically, before codegening a function, if it is a user-defined +operator, we register it in the precedence table. This allows the binary +operator parsing logic we already have in place to handle it. Since we +are working on a fully-general operator precedence parser, this is all +we need to do to "extend the grammar". + +Now we have useful user-defined binary operators. This builds a lot on +the previous framework we built for other operators. Adding unary +operators is a bit more challenging, because we don't have any framework +for it yet - lets see what it takes. + +User-defined Unary Operators +============================ + +Since we don't currently support unary operators in the Kaleidoscope +language, we'll need to add everything to support them. Above, we added +simple support for the 'unary' keyword to the lexer. In addition to +that, we need an AST node: + +.. code-block:: c++ + + /// UnaryExprAST - Expression class for a unary operator. + class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; + public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); + }; + +This AST node is very simple and obvious by now. It directly mirrors the +binary operator AST node, except that it only has one child. With this, +we need to add the parsing logic. Parsing a unary operator is pretty +simple: we'll add a new function to do it: + +.. code-block:: c++ + + /// unary + /// ::= primary + /// ::= '!' unary + static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; + } + +The grammar we add is pretty straightforward here. If we see a unary +operator when parsing a primary operator, we eat the operator as a +prefix and parse the remaining piece as another unary operator. This +allows us to handle multiple unary operators (e.g. "!!x"). Note that +unary operators can't have ambiguous parses like binary operators can, +so there is no need for precedence information. + +The problem with this function, is that we need to call ParseUnary from +somewhere. To do this, we change previous callers of ParsePrimary to +call ParseUnary instead: + +.. code-block:: c++ + + /// binoprhs + /// ::= ('+' unary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + ... + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + ... + } + /// expression + /// ::= unary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + +With these two simple changes, we are now able to parse unary operators +and build the AST for them. Next up, we need to add parser support for +prototypes, to parse the unary operator prototype. We extend the binary +operator code above with: + +.. code-block:: c++ + + /// prototype + /// ::= id '(' id* ')' + /// ::= binary LETTER number? (id, id) + /// ::= unary LETTER (id) + static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + ... + +As with binary operators, we name unary operators with a name that +includes the operator character. This assists us at code generation +time. Speaking of, the final piece we need to add is codegen support for +unary operators. It looks like this: + +.. code-block:: c++ + + Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); + } + +This code is similar to, but simpler than, the code for binary +operators. It is simpler primarily because it doesn't need to handle any +predefined operators. + +Kicking the Tires +================= + +It is somewhat hard to believe, but with a few simple extensions we've +covered in the last chapters, we have grown a real-ish language. With +this, we can do a lot of interesting things, including I/O, math, and a +bunch of other things. For example, we can now add a nice sequencing +operator (printd is defined to print out the specified value and a +newline): + +:: + + ready> extern printd(x); + Read extern: + declare double @printd(double) + + ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands. + .. + ready> printd(123) : printd(456) : printd(789); + 123.000000 + 456.000000 + 789.000000 + Evaluated to 0.000000 + +We can also define a bunch of other "primitive" operations, such as: + +:: + + # Logical unary not. + def unary!(v) + if v then + 0 + else + 1; + + # Unary negate. + def unary-(v) + 0-v; + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) + RHS < LHS; + + # Binary logical or, which does not short circuit. + def binary| 5 (LHS RHS) + if LHS then + 1 + else if RHS then + 1 + else + 0; + + # Binary logical and, which does not short circuit. + def binary& 6 (LHS RHS) + if !LHS then + 0 + else + !!RHS; + + # Define = with slightly lower precedence than relationals. + def binary = 9 (LHS RHS) + !(LHS < RHS | LHS > RHS); + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y; + +Given the previous if/then/else support, we can also define interesting +functions for I/O. For example, the following prints out a character +whose "density" reflects the value passed in: the lower the value, the +denser the character: + +:: + + ready> + + extern putchard(char) + def printdensity(d) + if d > 8 then + putchard(32) # ' ' + else if d > 4 then + putchard(46) # '.' + else if d > 2 then + putchard(43) # '+' + else + putchard(42); # '*' + ... + ready> printdensity(1): printdensity(2): printdensity(3): + printdensity(4): printdensity(5): printdensity(9): + putchard(10); + **++. + Evaluated to 0.000000 + +Based on these simple primitive operations, we can start to define more +interesting things. For example, here's a little function that solves +for the number of iterations it takes a function in the complex plane to +converge: + +:: + + # Determine whether the specific location diverges. + # Solve for z = z^2 + c in the complex plane. + def mandleconverger(real imag iters creal cimag) + if iters > 255 | (real*real + imag*imag > 4) then + iters + else + mandleconverger(real*real - imag*imag + creal, + 2*real*imag + cimag, + iters+1, creal, cimag); + + # Return the number of iterations required for the iteration to escape + def mandleconverge(real imag) + mandleconverger(real, imag, 0, real, imag); + +This "``z = z2 + c``" function is a beautiful little creature that is +the basis for computation of the `Mandelbrot +Set `_. Our +``mandelconverge`` function returns the number of iterations that it +takes for a complex orbit to escape, saturating to 255. This is not a +very useful function by itself, but if you plot its value over a +two-dimensional plane, you can see the Mandelbrot set. Given that we are +limited to using putchard here, our amazing graphical output is limited, +but we can whip together something using the density plotter above: + +:: + + # Compute and plot the mandlebrot set with the specified 2 dimensional range + # info. + def mandelhelp(xmin xmax xstep ymin ymax ystep) + for y = ymin, y < ymax, ystep in ( + (for x = xmin, x < xmax, xstep in + printdensity(mandleconverge(x,y))) + : putchard(10) + ) + + # mandel - This is a convenient helper function for plotting the mandelbrot set + # from the specified position with the specified Magnification. + def mandel(realstart imagstart realmag imagmag) + mandelhelp(realstart, realstart+realmag*78, realmag, + imagstart, imagstart+imagmag*40, imagmag); + +Given this, we can try plotting out the mandlebrot set! Lets try it out: + +:: + + ready> mandel(-2.3, -1.3, 0.05, 0.07); + *******************************+++++++++++************************************* + *************************+++++++++++++++++++++++******************************* + **********************+++++++++++++++++++++++++++++**************************** + *******************+++++++++++++++++++++.. ...++++++++************************* + *****************++++++++++++++++++++++.... ...+++++++++*********************** + ***************+++++++++++++++++++++++..... ...+++++++++********************* + **************+++++++++++++++++++++++.... ....+++++++++******************** + *************++++++++++++++++++++++...... .....++++++++******************* + ************+++++++++++++++++++++....... .......+++++++****************** + ***********+++++++++++++++++++.... ... .+++++++***************** + **********+++++++++++++++++....... .+++++++**************** + *********++++++++++++++........... ...+++++++*************** + ********++++++++++++............ ...++++++++************** + ********++++++++++... .......... .++++++++************** + *******+++++++++..... .+++++++++************* + *******++++++++...... ..+++++++++************* + *******++++++....... ..+++++++++************* + *******+++++...... ..+++++++++************* + *******.... .... ...+++++++++************* + *******.... . ...+++++++++************* + *******+++++...... ...+++++++++************* + *******++++++....... ..+++++++++************* + *******++++++++...... .+++++++++************* + *******+++++++++..... ..+++++++++************* + ********++++++++++... .......... .++++++++************** + ********++++++++++++............ ...++++++++************** + *********++++++++++++++.......... ...+++++++*************** + **********++++++++++++++++........ .+++++++**************** + **********++++++++++++++++++++.... ... ..+++++++**************** + ***********++++++++++++++++++++++....... .......++++++++***************** + ************+++++++++++++++++++++++...... ......++++++++****************** + **************+++++++++++++++++++++++.... ....++++++++******************** + ***************+++++++++++++++++++++++..... ...+++++++++********************* + *****************++++++++++++++++++++++.... ...++++++++*********************** + *******************+++++++++++++++++++++......++++++++************************* + *********************++++++++++++++++++++++.++++++++*************************** + *************************+++++++++++++++++++++++******************************* + ******************************+++++++++++++************************************ + ******************************************************************************* + ******************************************************************************* + ******************************************************************************* + Evaluated to 0.000000 + ready> mandel(-2, -1, 0.02, 0.04); + **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++ + ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++. + *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++... + *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++..... + ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........ + **************++++++++++++++++++++++++++++++++++++++++++++++++++++++........... + ************+++++++++++++++++++++++++++++++++++++++++++++++++++++.............. + ***********++++++++++++++++++++++++++++++++++++++++++++++++++........ . + **********++++++++++++++++++++++++++++++++++++++++++++++............. + ********+++++++++++++++++++++++++++++++++++++++++++.................. + *******+++++++++++++++++++++++++++++++++++++++....................... + ******+++++++++++++++++++++++++++++++++++........................... + *****++++++++++++++++++++++++++++++++............................ + *****++++++++++++++++++++++++++++............................... + ****++++++++++++++++++++++++++...... ......................... + ***++++++++++++++++++++++++......... ...... ........... + ***++++++++++++++++++++++............ + **+++++++++++++++++++++.............. + **+++++++++++++++++++................ + *++++++++++++++++++................. + *++++++++++++++++............ ... + *++++++++++++++.............. + *+++....++++................ + *.......... ........... + * + *.......... ........... + *+++....++++................ + *++++++++++++++.............. + *++++++++++++++++............ ... + *++++++++++++++++++................. + **+++++++++++++++++++................ + **+++++++++++++++++++++.............. + ***++++++++++++++++++++++............ + ***++++++++++++++++++++++++......... ...... ........... + ****++++++++++++++++++++++++++...... ......................... + *****++++++++++++++++++++++++++++............................... + *****++++++++++++++++++++++++++++++++............................ + ******+++++++++++++++++++++++++++++++++++........................... + *******+++++++++++++++++++++++++++++++++++++++....................... + ********+++++++++++++++++++++++++++++++++++++++++++.................. + Evaluated to 0.000000 + ready> mandel(-0.9, -1.4, 0.02, 0.03); + ******************************************************************************* + ******************************************************************************* + ******************************************************************************* + **********+++++++++++++++++++++************************************************ + *+++++++++++++++++++++++++++++++++++++++*************************************** + +++++++++++++++++++++++++++++++++++++++++++++********************************** + ++++++++++++++++++++++++++++++++++++++++++++++++++***************************** + ++++++++++++++++++++++++++++++++++++++++++++++++++++++************************* + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++********************** + +++++++++++++++++++++++++++++++++.........++++++++++++++++++******************* + +++++++++++++++++++++++++++++++.... ......+++++++++++++++++++**************** + +++++++++++++++++++++++++++++....... ........+++++++++++++++++++************** + ++++++++++++++++++++++++++++........ ........++++++++++++++++++++************ + +++++++++++++++++++++++++++......... .. ...+++++++++++++++++++++********** + ++++++++++++++++++++++++++........... ....++++++++++++++++++++++******** + ++++++++++++++++++++++++............. .......++++++++++++++++++++++****** + +++++++++++++++++++++++............. ........+++++++++++++++++++++++**** + ++++++++++++++++++++++........... ..........++++++++++++++++++++++*** + ++++++++++++++++++++........... .........++++++++++++++++++++++* + ++++++++++++++++++............ ...........++++++++++++++++++++ + ++++++++++++++++............... .............++++++++++++++++++ + ++++++++++++++................. ...............++++++++++++++++ + ++++++++++++.................. .................++++++++++++++ + +++++++++.................. .................+++++++++++++ + ++++++........ . ......... ..++++++++++++ + ++............ ...... ....++++++++++ + .............. ...++++++++++ + .............. ....+++++++++ + .............. .....++++++++ + ............. ......++++++++ + ........... .......++++++++ + ......... ........+++++++ + ......... ........+++++++ + ......... ....+++++++ + ........ ...+++++++ + ....... ...+++++++ + ....+++++++ + .....+++++++ + ....+++++++ + ....+++++++ + ....+++++++ + Evaluated to 0.000000 + ready> ^D + +At this point, you may be starting to realize that Kaleidoscope is a +real and powerful language. It may not be self-similar :), but it can be +used to plot things that are! + +With this, we conclude the "adding user-defined operators" chapter of +the tutorial. We have successfully augmented our language, adding the +ability to extend the language in the library, and we have shown how +this can be used to build a simple but interesting end-user application +in Kaleidoscope. At this point, Kaleidoscope can build a variety of +applications that are functional and can call functions with +side-effects, but it can't actually define and mutate a variable itself. + +Strikingly, variable mutation is an important feature of some languages, +and it is not at all obvious how to `add support for mutable +variables `_ without having to add an "SSA construction" +phase to your front-end. In the next chapter, we will describe how you +can add variable mutation without building SSA in your front-end. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions.. To build this example, use: + +.. code-block:: bash + + # Compile + clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy + # Run + ./toy + +On some platforms, you will need to specify -rdynamic or +-Wl,--export-dynamic when linking. This ensures that symbols defined in +the main executable are exported to the dynamic linker and so are +available for symbol resolution at run time. This is not needed if you +compile your support code into a shared library, although doing that +will cause problems on Windows. + +Here is the code: + +.. code-block:: c++ + + #include "llvm/DerivedTypes.h" + #include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/ExecutionEngine/JIT.h" + #include "llvm/IRBuilder.h" + #include "llvm/LLVMContext.h" + #include "llvm/Module.h" + #include "llvm/PassManager.h" + #include "llvm/Analysis/Verifier.h" + #include "llvm/Analysis/Passes.h" + #include "llvm/DataLayout.h" + #include "llvm/Transforms/Scalar.h" + #include "llvm/Support/TargetSelect.h" + #include + #include + #include + #include + using namespace llvm; + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); + }; + + /// UnaryExprAST - Expression class for a unary operator. + class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; + public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); + }; + + /// IfExprAST - Expression class for if/then/else. + class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; + public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); + }; + + /// ForExprAST - Expression class for for/in. + class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; + public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes), as well as if it is an operator. + class PrototypeAST { + std::string Name; + std::vector Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. + public: + PrototypeAST(const std::string &name, const std::vector &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// ifexpr ::= 'if' expression 'then' expression 'else' expression + static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); + } + + /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression + static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + /// ::= ifexpr + /// ::= forexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } + } + + /// unary + /// ::= primary + /// ::= '!' unary + static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; + } + + /// binoprhs + /// ::= ('+' unary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= unary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + /// ::= binary LETTER number? (id, id) + /// ::= unary LETTER (id) + static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Code Generation + //===----------------------------------------------------------------------===// + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + static FunctionPassManager *TheFPM; + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); + } + + Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); + } + + Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[2] = { L, R }; + return Builder.CreateCall(F, Ops, "binop"); + } + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + + Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; + } + + Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); + } + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; + } + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing and JIT Driver + //===----------------------------------------------------------------------===// + + static ExecutionEngine *TheExecutionEngine; + + static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // "Library" functions that can be "extern'd" from user code. + //===----------------------------------------------------------------------===// + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + + /// printd - printf that takes a double prints it as "%f\n", returning 0. + extern "C" + double printd(double X) { + printf("%f\n", X); + return 0; + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; + } + +`Next: Extending the language: mutable variables / SSA +construction `_ + diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html deleted file mode 100644 index 4d5a4aa7e84a..000000000000 --- a/docs/tutorial/LangImpl7.html +++ /dev/null @@ -1,2164 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: Mutable Variables / SSA - construction - - - - - - - -

Kaleidoscope: Extending the Language: Mutable Variables

- - - -
-

Written by Chris Lattner

-
- - -

Chapter 7 Introduction

- - -
- -

Welcome to Chapter 7 of the "Implementing a language -with LLVM" tutorial. In chapters 1 through 6, we've built a very -respectable, albeit simple, functional -programming language. In our journey, we learned some parsing techniques, -how to build and represent an AST, how to build LLVM IR, and how to optimize -the resultant code as well as JIT compile it.

- -

While Kaleidoscope is interesting as a functional language, the fact that it -is functional makes it "too easy" to generate LLVM IR for it. In particular, a -functional language makes it very easy to build LLVM IR directly in SSA form. -Since LLVM requires that the input code be in SSA form, this is a very nice -property and it is often unclear to newcomers how to generate code for an -imperative language with mutable variables.

- -

The short (and happy) summary of this chapter is that there is no need for -your front-end to build SSA form: LLVM provides highly tuned and well tested -support for this, though the way it works is a bit unexpected for some.

- -
- - -

Why is this a hard problem?

- - -
- -

-To understand why mutable variables cause complexities in SSA construction, -consider this extremely simple C example: -

- -
-
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-
-
- -

In this case, we have the variable "X", whose value depends on the path -executed in the program. Because there are two different possible values for X -before the return instruction, a PHI node is inserted to merge the two values. -The LLVM IR that we want for this example looks like this:

- -
-
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-
-
- -

In this example, the loads from the G and H global variables are explicit in -the LLVM IR, and they live in the then/else branches of the if statement -(cond_true/cond_false). In order to merge the incoming values, the X.2 phi node -in the cond_next block selects the right value to use based on where control -flow is coming from: if control flow comes from the cond_false block, X.2 gets -the value of X.1. Alternatively, if control flow comes from cond_true, it gets -the value of X.0. The intent of this chapter is not to explain the details of -SSA form. For more information, see one of the many online -references.

- -

The question for this article is "who places the phi nodes when lowering -assignments to mutable variables?". The issue here is that LLVM -requires that its IR be in SSA form: there is no "non-ssa" mode for it. -However, SSA construction requires non-trivial algorithms and data structures, -so it is inconvenient and wasteful for every front-end to have to reproduce this -logic.

- -
- - -

Memory in LLVM

- - -
- -

The 'trick' here is that while LLVM does require all register values to be -in SSA form, it does not require (or permit) memory objects to be in SSA form. -In the example above, note that the loads from G and H are direct accesses to -G and H: they are not renamed or versioned. This differs from some other -compiler systems, which do try to version memory objects. In LLVM, instead of -encoding dataflow analysis of memory into the LLVM IR, it is handled with Analysis Passes which are computed on -demand.

- -

-With this in mind, the high-level idea is that we want to make a stack variable -(which lives in memory, because it is on the stack) for each mutable object in -a function. To take advantage of this trick, we need to talk about how LLVM -represents stack variables. -

- -

In LLVM, all memory accesses are explicit with load/store instructions, and -it is carefully designed not to have (or need) an "address-of" operator. Notice -how the type of the @G/@H global variables is actually "i32*" even though the -variable is defined as "i32". What this means is that @G defines space -for an i32 in the global data area, but its name actually refers to the -address for that space. Stack variables work the same way, except that instead of -being declared with global variable definitions, they are declared with the -LLVM alloca instruction:

- -
-
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-
-
- -

This code shows an example of how you can declare and manipulate a stack -variable in the LLVM IR. Stack memory allocated with the alloca instruction is -fully general: you can pass the address of the stack slot to functions, you can -store it in other variables, etc. In our example above, we could rewrite the -example to use the alloca technique to avoid using a PHI node:

- -
-
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-
-
- -

With this, we have discovered a way to handle arbitrary mutable variables -without the need to create Phi nodes at all:

- -
    -
  1. Each mutable variable becomes a stack allocation.
  2. -
  3. Each read of the variable becomes a load from the stack.
  4. -
  5. Each update of the variable becomes a store to the stack.
  6. -
  7. Taking the address of a variable just uses the stack address directly.
  8. -
- -

While this solution has solved our immediate problem, it introduced another -one: we have now apparently introduced a lot of stack traffic for very simple -and common operations, a major performance problem. Fortunately for us, the -LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles -this case, promoting allocas like this into SSA registers, inserting Phi nodes -as appropriate. If you run this example through the pass, for example, you'll -get:

- -
-
-$ llvm-as < example.ll | opt -mem2reg | llvm-dis
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-
-
- -

The mem2reg pass implements the standard "iterated dominance frontier" -algorithm for constructing SSA form and has a number of optimizations that speed -up (very common) degenerate cases. The mem2reg optimization pass is the answer to dealing -with mutable variables, and we highly recommend that you depend on it. Note that -mem2reg only works on variables in certain circumstances:

- -
    -
  1. mem2reg is alloca-driven: it looks for allocas and if it can handle them, it -promotes them. It does not apply to global variables or heap allocations.
  2. - -
  3. mem2reg only looks for alloca instructions in the entry block of the -function. Being in the entry block guarantees that the alloca is only executed -once, which makes analysis simpler.
  4. - -
  5. mem2reg only promotes allocas whose uses are direct loads and stores. If -the address of the stack object is passed to a function, or if any funny pointer -arithmetic is involved, the alloca will not be promoted.
  6. - -
  7. mem2reg only works on allocas of first class -values (such as pointers, scalars and vectors), and only if the array size -of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of -promoting structs or arrays to registers. Note that the "scalarrepl" pass is -more powerful and can promote structs, "unions", and arrays in many cases.
  8. - -
- -

-All of these properties are easy to satisfy for most imperative languages, and -we'll illustrate it below with Kaleidoscope. The final question you may be -asking is: should I bother with this nonsense for my front-end? Wouldn't it be -better if I just did SSA construction directly, avoiding use of the mem2reg -optimization pass? In short, we strongly recommend that you use this technique -for building SSA form, unless there is an extremely good reason not to. Using -this technique is:

- -
    -
  • Proven and well tested: llvm-gcc and clang both use this technique for local -mutable variables. As such, the most common clients of LLVM are using this to -handle a bulk of their variables. You can be sure that bugs are found fast and -fixed early.
  • - -
  • Extremely Fast: mem2reg has a number of special cases that make it fast in -common cases as well as fully general. For example, it has fast-paths for -variables that are only used in a single block, variables that only have one -assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc. -
  • - -
  • Needed for debug info generation: -Debug information in LLVM relies on having the address of the variable -exposed so that debug info can be attached to it. This technique dovetails -very naturally with this style of debug info.
  • -
- -

If nothing else, this makes it much easier to get your front-end up and -running, and is very simple to implement. Lets extend Kaleidoscope with mutable -variables now! -

- -
- - -

Mutable Variables in Kaleidoscope

- - -
- -

Now that we know the sort of problem we want to tackle, lets see what this -looks like in the context of our little Kaleidoscope language. We're going to -add two features:

- -
    -
  1. The ability to mutate variables with the '=' operator.
  2. -
  3. The ability to define new variables.
  4. -
- -

While the first item is really what this is about, we only have variables -for incoming arguments as well as for induction variables, and redefining those only -goes so far :). Also, the ability to define new variables is a -useful thing regardless of whether you will be mutating them. Here's a -motivating example that shows how we could use these:

- -
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x < 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  var a = 1, b = 1, c in
-  (for i = 3, i < x in 
-     c = a + b :
-     a = b :
-     b = c) :
-  b;
-
-# Call it. 
-fibi(10);
-
-
- -

-In order to mutate variables, we have to change our existing variables to use -the "alloca trick". Once we have that, we'll add our new operator, then extend -Kaleidoscope to support new variable definitions. -

- -
- - -

Adjusting Existing Variables for Mutation

- - -
- -

-The symbol table in Kaleidoscope is managed at code generation time by the -'NamedValues' map. This map currently keeps track of the LLVM "Value*" -that holds the double value for the named variable. In order to support -mutation, we need to change this slightly, so that it NamedValues holds -the memory location of the variable in question. Note that this -change is a refactoring: it changes the structure of the code, but does not -(by itself) change the behavior of the compiler. All of these changes are -isolated in the Kaleidoscope code generator.

- -

-At this point in Kaleidoscope's development, it only supports variables for two -things: incoming arguments to functions and the induction variable of 'for' -loops. For consistency, we'll allow mutation of these variables in addition to -other user-defined variables. This means that these will both need memory -locations. -

- -

To start our transformation of Kaleidoscope, we'll change the NamedValues -map so that it maps to AllocaInst* instead of Value*. Once we do this, the C++ -compiler will tell us what parts of the code we need to update:

- -
-
-static std::map<std::string, AllocaInst*> NamedValues;
-
-
- -

Also, since we will need to create these alloca's, we'll use a helper -function that ensures that the allocas are created in the entry block of the -function:

- -
-
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &VarName) {
-  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
-                 TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-
-
- -

This funny looking code creates an IRBuilder object that is pointing at -the first instruction (.begin()) of the entry block. It then creates an alloca -with the expected name and returns it. Because all values in Kaleidoscope are -doubles, there is no need to pass in a type to use.

- -

With this in place, the first functionality change we want to make is to -variable references. In our new scheme, variables live on the stack, so code -generating a reference to them actually needs to produce a load from the stack -slot:

- -
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  // Load the value.
-  return Builder.CreateLoad(V, Name.c_str());
-}
-
-
- -

As you can see, this is pretty straightforward. Now we need to update the -things that define the variables to set up the alloca. We'll start with -ForExprAST::Codegen (see the full code listing for -the unabridged code):

- -
-
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  // Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
-    // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);
-  ...
-
-  // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca);
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);
-  ...
-
-
- -

This code is virtually identical to the code before we allowed mutable variables. The -big difference is that we no longer have to construct a PHI node, and we use -load/store to access the variable as needed.

- -

To support mutable argument variables, we need to also make allocas for them. -The code for this is also pretty simple:

- -
-
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F->arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-
- -

For each argument, we make an alloca, store the input value to the function -into the alloca, and register the alloca as the memory location for the -argument. This method gets invoked by FunctionAST::Codegen right after -it sets up the entry block for the function.

- -

The final missing piece is adding the mem2reg pass, which allows us to get -good codegen once again:

- -
-
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-    // Promote allocas to registers.
-    OurFPM.add(createPromoteMemoryToRegisterPass());
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-
-
- -

It is interesting to see what the code looks like before and after the -mem2reg optimization runs. For example, this is the before/after code for our -recursive fib function. Before the optimization:

- -
-
-define double @fib(double %x) {
-entry:
-  %x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  br label %ifcont
-
-else:		; preds = %entry
-  %x3 = load double* %x1
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %x4 = load double* %x1
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-
-
- -

Here there is only one variable (x, the input argument) but you can still -see the extremely simple-minded code generation strategy we are using. In the -entry block, an alloca is created, and the initial input value is stored into -it. Each reference to the variable does a reload from the stack. Also, note -that we didn't modify the if/then/else expression, so it still inserts a PHI -node. While we could make an alloca for it, it is actually easier to create a -PHI node for it, so we still just make the PHI.

- -

Here is the code after the mem2reg pass runs:

- -
-
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-
-
- -

This is a trivial case for mem2reg, since there are no redefinitions of the -variable. The point of showing this is to calm your tension about inserting -such blatent inefficiencies :).

- -

After the rest of the optimizers run, we get:

- -
-
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-
-
- -

Here we see that the simplifycfg pass decided to clone the return instruction -into the end of the 'else' block. This allowed it to eliminate some branches -and the PHI node.

- -

Now that all symbol table references are updated to use stack variables, -we'll add the assignment operator.

- -
- - -

New Assignment Operator

- - -
- -

With our current framework, adding a new assignment operator is really -simple. We will parse it just like any other binary operator, but handle it -internally (instead of allowing the user to define it). The first step is to -set a precedence:

- -
-
- int main() {
-   // Install standard binary operators.
-   // 1 is lowest precedence.
-   BinopPrecedence['='] = 2;
-   BinopPrecedence['<'] = 10;
-   BinopPrecedence['+'] = 20;
-   BinopPrecedence['-'] = 20;
-
-
- -

Now that the parser knows the precedence of the binary operator, it takes -care of all the parsing and AST generation. We just need to implement codegen -for the assignment operator. This looks like:

- -
-
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-
-
- -

Unlike the rest of the binary operators, our assignment operator doesn't -follow the "emit LHS, emit RHS, do computation" model. As such, it is handled -as a special case before the other binary operators are handled. The other -strange thing is that it requires the LHS to be a variable. It is invalid to -have "(x+1) = expr" - only things like "x = expr" are allowed. -

- -
-
-    // Codegen the RHS.
-    Value *Val = RHS->Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE->getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  ...  
-
-
- -

Once we have the variable, codegen'ing the assignment is straightforward: -we emit the RHS of the assignment, create a store, and return the computed -value. Returning a value allows for chained assignments like "X = (Y = Z)".

- -

Now that we have an assignment operator, we can mutate loop variables and -arguments. For example, we can now run code like this:

- -
-
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-
-
- -

When run, this example prints "123" and then "4", showing that we did -actually mutate the value! Okay, we have now officially implemented our goal: -getting this to work requires SSA construction in the general case. However, -to be really useful, we want the ability to define our own local variables, lets -add this next! -

- -
- - -

User-defined Local Variables

- - -
- -

Adding var/in is just like any other other extensions we made to -Kaleidoscope: we extend the lexer, the parser, the AST and the code generator. -The first step for adding our new 'var/in' construct is to extend the lexer. -As before, this is pretty trivial, the code looks like this:

- -
-
-enum Token {
-  ...
-  // var definition
-  tok_var = -13
-...
-}
-...
-static int gettok() {
-...
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    if (IdentifierStr == "var") return tok_var;
-    return tok_identifier;
-...
-
-
- -

The next step is to define the AST node that we will construct. For var/in, -it looks like this:

- -
-
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector<std::pair<std::string, ExprAST*> > VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-
-
- -

var/in allows a list of names to be defined all at once, and each name can -optionally have an initializer value. As such, we capture this information in -the VarNames vector. Also, var/in has a body, this body is allowed to access -the variables defined by the var/in.

- -

With this in place, we can define the parser pieces. The first thing we do is add -it as a primary expression:

- -
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-///   ::= varexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  case tok_var:        return ParseVarExpr();
-  }
-}
-
-
- -

Next we define ParseVarExpr:

- -
-
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector<std::pair<std::string, ExprAST*> > VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-
-
- -

The first part of this code parses the list of identifier/expr pairs into the -local VarNames vector. - -

-
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-
-
- -

Once all the variables are parsed, we then parse the body and create the -AST node:

- -
-
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-
-
- -

Now that we can parse and represent the code, we need to support emission of -LLVM IR for it. This code starts out with:

- -
-
-Value *VarExprAST::Codegen() {
-  std::vector<AllocaInst *> OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-
-
- -

Basically it loops over all the variables, installing them one at a time. -For each variable we put into the symbol table, we remember the previous value -that we replace in OldBindings.

- -
-
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init->Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-
-
- -

There are more comments here than code. The basic idea is that we emit the -initializer, create the alloca, then update the symbol table to point to it. -Once all the variables are installed in the symbol table, we evaluate the body -of the var/in expression:

- -
-
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body->Codegen();
-  if (BodyVal == 0) return 0;
-
-
- -

Finally, before returning, we restore the previous variable bindings:

- -
-
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-
-
- -

The end result of all of this is that we get properly scoped variable -definitions, and we even (trivially) allow mutation of them :).

- -

With this, we completed what we set out to do. Our nice iterative fib -example from the intro compiles and runs just fine. The mem2reg pass optimizes -all of our stack variables into SSA registers, inserting PHI nodes where needed, -and our front-end remains simple: no "iterated dominance frontier" computation -anywhere in sight.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with mutable -variables and var/in support. To build this example, use: -

- -
-
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-
-
- -

Here is the code:

- -
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12,
-  
-  // var definition
-  tok_var = -13
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    if (IdentifierStr == "var") return tok_var;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  const std::string &getName() const { return Name; }
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector<ExprAST*> Args;
-public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector<std::pair<std::string, ExprAST*> > VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector<std::string> Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-  
-  void CreateArgumentAllocas(Function *F);
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map<char, int> BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector<std::pair<std::string, ExprAST*> > VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-  
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-  
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-///   ::= varexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  case tok_var:        return ParseVarExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec < ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal < 1 || NumVal > 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector<std::string> ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind && ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, AllocaInst*> NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &VarName) {
-  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
-                 TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  // Load the value.
-  return Builder.CreateLoad(V, Name.c_str());
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-    // Codegen the RHS.
-    Value *Val = RHS->Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE->getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '<':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary")+Op);
-  assert(F && "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF->arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector<Value*> ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction->getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   var = alloca double
-  //   ...
-  //   start = startexpr
-  //   store start -> var
-  //   goto loop
-  // loop: 
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   endcond = endexpr
-  //
-  //   curvar = load var
-  //   nextvar = curvar + step
-  //   store nextvar -> var
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  // Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  AllocaInst *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Alloca;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Value *VarExprAST::Codegen() {
-  std::vector<AllocaInst *> OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-    
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init->Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-  
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body->Codegen();
-  if (BodyVal == 0) return 0;
-  
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx)
-    AI->setName(Args[Idx]);
-    
-  return F;
-}
-
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F->arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  // Add all arguments to the symbol table and create their allocas.
-  Proto->CreateArgumentAllocas(TheFunction);
-
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction->eraseFromParent();
-
-  if (Proto->isBinaryOp())
-    BinopPrecedence.erase(Proto->getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F->dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready> ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['='] = 2;
-  BinopPrecedence['<'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready> ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Promote allocas to registers.
-  OurFPM.add(createPromoteMemoryToRegisterPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
-  return 0;
-}
-
-
- -Next: Conclusion and other useful LLVM tidbits -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst new file mode 100644 index 000000000000..6dde2fe41d1a --- /dev/null +++ b/docs/tutorial/LangImpl7.rst @@ -0,0 +1,2003 @@ +======================================================= +Kaleidoscope: Extending the Language: Mutable Variables +======================================================= + +.. contents:: + :local: + +Chapter 7 Introduction +====================== + +Welcome to Chapter 7 of the "`Implementing a language with +LLVM `_" tutorial. In chapters 1 through 6, we've built a +very respectable, albeit simple, `functional programming +language `_. In our +journey, we learned some parsing techniques, how to build and represent +an AST, how to build LLVM IR, and how to optimize the resultant code as +well as JIT compile it. + +While Kaleidoscope is interesting as a functional language, the fact +that it is functional makes it "too easy" to generate LLVM IR for it. In +particular, a functional language makes it very easy to build LLVM IR +directly in `SSA +form `_. +Since LLVM requires that the input code be in SSA form, this is a very +nice property and it is often unclear to newcomers how to generate code +for an imperative language with mutable variables. + +The short (and happy) summary of this chapter is that there is no need +for your front-end to build SSA form: LLVM provides highly tuned and +well tested support for this, though the way it works is a bit +unexpected for some. + +Why is this a hard problem? +=========================== + +To understand why mutable variables cause complexities in SSA +construction, consider this extremely simple C example: + +.. code-block:: c + + int G, H; + int test(_Bool Condition) { + int X; + if (Condition) + X = G; + else + X = H; + return X; + } + +In this case, we have the variable "X", whose value depends on the path +executed in the program. Because there are two different possible values +for X before the return instruction, a PHI node is inserted to merge the +two values. The LLVM IR that we want for this example looks like this: + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32* + @H = weak global i32 0 ; type of @H is i32* + + define i32 @test(i1 %Condition) { + entry: + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + br label %cond_next + + cond_false: + %X.1 = load i32* @H + br label %cond_next + + cond_next: + %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ] + ret i32 %X.2 + } + +In this example, the loads from the G and H global variables are +explicit in the LLVM IR, and they live in the then/else branches of the +if statement (cond\_true/cond\_false). In order to merge the incoming +values, the X.2 phi node in the cond\_next block selects the right value +to use based on where control flow is coming from: if control flow comes +from the cond\_false block, X.2 gets the value of X.1. Alternatively, if +control flow comes from cond\_true, it gets the value of X.0. The intent +of this chapter is not to explain the details of SSA form. For more +information, see one of the many `online +references `_. + +The question for this article is "who places the phi nodes when lowering +assignments to mutable variables?". The issue here is that LLVM +*requires* that its IR be in SSA form: there is no "non-ssa" mode for +it. However, SSA construction requires non-trivial algorithms and data +structures, so it is inconvenient and wasteful for every front-end to +have to reproduce this logic. + +Memory in LLVM +============== + +The 'trick' here is that while LLVM does require all register values to +be in SSA form, it does not require (or permit) memory objects to be in +SSA form. In the example above, note that the loads from G and H are +direct accesses to G and H: they are not renamed or versioned. This +differs from some other compiler systems, which do try to version memory +objects. In LLVM, instead of encoding dataflow analysis of memory into +the LLVM IR, it is handled with `Analysis +Passes <../WritingAnLLVMPass.html>`_ which are computed on demand. + +With this in mind, the high-level idea is that we want to make a stack +variable (which lives in memory, because it is on the stack) for each +mutable object in a function. To take advantage of this trick, we need +to talk about how LLVM represents stack variables. + +In LLVM, all memory accesses are explicit with load/store instructions, +and it is carefully designed not to have (or need) an "address-of" +operator. Notice how the type of the @G/@H global variables is actually +"i32\*" even though the variable is defined as "i32". What this means is +that @G defines *space* for an i32 in the global data area, but its +*name* actually refers to the address for that space. Stack variables +work the same way, except that instead of being declared with global +variable definitions, they are declared with the `LLVM alloca +instruction <../LangRef.html#i_alloca>`_: + +.. code-block:: llvm + + define i32 @example() { + entry: + %X = alloca i32 ; type of %X is i32*. + ... + %tmp = load i32* %X ; load the stack value %X from the stack. + %tmp2 = add i32 %tmp, 1 ; increment it + store i32 %tmp2, i32* %X ; store it back + ... + +This code shows an example of how you can declare and manipulate a stack +variable in the LLVM IR. Stack memory allocated with the alloca +instruction is fully general: you can pass the address of the stack slot +to functions, you can store it in other variables, etc. In our example +above, we could rewrite the example to use the alloca technique to avoid +using a PHI node: + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32* + @H = weak global i32 0 ; type of @H is i32* + + define i32 @test(i1 %Condition) { + entry: + %X = alloca i32 ; type of %X is i32*. + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + store i32 %X.0, i32* %X ; Update X + br label %cond_next + + cond_false: + %X.1 = load i32* @H + store i32 %X.1, i32* %X ; Update X + br label %cond_next + + cond_next: + %X.2 = load i32* %X ; Read X + ret i32 %X.2 + } + +With this, we have discovered a way to handle arbitrary mutable +variables without the need to create Phi nodes at all: + +#. Each mutable variable becomes a stack allocation. +#. Each read of the variable becomes a load from the stack. +#. Each update of the variable becomes a store to the stack. +#. Taking the address of a variable just uses the stack address + directly. + +While this solution has solved our immediate problem, it introduced +another one: we have now apparently introduced a lot of stack traffic +for very simple and common operations, a major performance problem. +Fortunately for us, the LLVM optimizer has a highly-tuned optimization +pass named "mem2reg" that handles this case, promoting allocas like this +into SSA registers, inserting Phi nodes as appropriate. If you run this +example through the pass, for example, you'll get: + +.. code-block:: bash + + $ llvm-as < example.ll | opt -mem2reg | llvm-dis + @G = weak global i32 0 + @H = weak global i32 0 + + define i32 @test(i1 %Condition) { + entry: + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + br label %cond_next + + cond_false: + %X.1 = load i32* @H + br label %cond_next + + cond_next: + %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ] + ret i32 %X.01 + } + +The mem2reg pass implements the standard "iterated dominance frontier" +algorithm for constructing SSA form and has a number of optimizations +that speed up (very common) degenerate cases. The mem2reg optimization +pass is the answer to dealing with mutable variables, and we highly +recommend that you depend on it. Note that mem2reg only works on +variables in certain circumstances: + +#. mem2reg is alloca-driven: it looks for allocas and if it can handle + them, it promotes them. It does not apply to global variables or heap + allocations. +#. mem2reg only looks for alloca instructions in the entry block of the + function. Being in the entry block guarantees that the alloca is only + executed once, which makes analysis simpler. +#. mem2reg only promotes allocas whose uses are direct loads and stores. + If the address of the stack object is passed to a function, or if any + funny pointer arithmetic is involved, the alloca will not be + promoted. +#. mem2reg only works on allocas of `first + class <../LangRef.html#t_classifications>`_ values (such as pointers, + scalars and vectors), and only if the array size of the allocation is + 1 (or missing in the .ll file). mem2reg is not capable of promoting + structs or arrays to registers. Note that the "scalarrepl" pass is + more powerful and can promote structs, "unions", and arrays in many + cases. + +All of these properties are easy to satisfy for most imperative +languages, and we'll illustrate it below with Kaleidoscope. The final +question you may be asking is: should I bother with this nonsense for my +front-end? Wouldn't it be better if I just did SSA construction +directly, avoiding use of the mem2reg optimization pass? In short, we +strongly recommend that you use this technique for building SSA form, +unless there is an extremely good reason not to. Using this technique +is: + +- Proven and well tested: llvm-gcc and clang both use this technique + for local mutable variables. As such, the most common clients of LLVM + are using this to handle a bulk of their variables. You can be sure + that bugs are found fast and fixed early. +- Extremely Fast: mem2reg has a number of special cases that make it + fast in common cases as well as fully general. For example, it has + fast-paths for variables that are only used in a single block, + variables that only have one assignment point, good heuristics to + avoid insertion of unneeded phi nodes, etc. +- Needed for debug info generation: `Debug information in + LLVM <../SourceLevelDebugging.html>`_ relies on having the address of + the variable exposed so that debug info can be attached to it. This + technique dovetails very naturally with this style of debug info. + +If nothing else, this makes it much easier to get your front-end up and +running, and is very simple to implement. Lets extend Kaleidoscope with +mutable variables now! + +Mutable Variables in Kaleidoscope +================================= + +Now that we know the sort of problem we want to tackle, lets see what +this looks like in the context of our little Kaleidoscope language. +We're going to add two features: + +#. The ability to mutate variables with the '=' operator. +#. The ability to define new variables. + +While the first item is really what this is about, we only have +variables for incoming arguments as well as for induction variables, and +redefining those only goes so far :). Also, the ability to define new +variables is a useful thing regardless of whether you will be mutating +them. Here's a motivating example that shows how we could use these: + +:: + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y; + + # Recursive fib, we could do this before. + def fib(x) + if (x < 3) then + 1 + else + fib(x-1)+fib(x-2); + + # Iterative fib. + def fibi(x) + var a = 1, b = 1, c in + (for i = 3, i < x in + c = a + b : + a = b : + b = c) : + b; + + # Call it. + fibi(10); + +In order to mutate variables, we have to change our existing variables +to use the "alloca trick". Once we have that, we'll add our new +operator, then extend Kaleidoscope to support new variable definitions. + +Adjusting Existing Variables for Mutation +========================================= + +The symbol table in Kaleidoscope is managed at code generation time by +the '``NamedValues``' map. This map currently keeps track of the LLVM +"Value\*" that holds the double value for the named variable. In order +to support mutation, we need to change this slightly, so that it +``NamedValues`` holds the *memory location* of the variable in question. +Note that this change is a refactoring: it changes the structure of the +code, but does not (by itself) change the behavior of the compiler. All +of these changes are isolated in the Kaleidoscope code generator. + +At this point in Kaleidoscope's development, it only supports variables +for two things: incoming arguments to functions and the induction +variable of 'for' loops. For consistency, we'll allow mutation of these +variables in addition to other user-defined variables. This means that +these will both need memory locations. + +To start our transformation of Kaleidoscope, we'll change the +NamedValues map so that it maps to AllocaInst\* instead of Value\*. Once +we do this, the C++ compiler will tell us what parts of the code we need +to update: + +.. code-block:: c++ + + static std::map NamedValues; + +Also, since we will need to create these alloca's, we'll use a helper +function that ensures that the allocas are created in the entry block of +the function: + +.. code-block:: c++ + + /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of + /// the function. This is used for mutable variables etc. + static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + const std::string &VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); + } + +This funny looking code creates an IRBuilder object that is pointing at +the first instruction (.begin()) of the entry block. It then creates an +alloca with the expected name and returns it. Because all values in +Kaleidoscope are doubles, there is no need to pass in a type to use. + +With this in place, the first functionality change we want to make is to +variable references. In our new scheme, variables live on the stack, so +code generating a reference to them actually needs to produce a load +from the stack slot: + +.. code-block:: c++ + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (V == 0) return ErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); + } + +As you can see, this is pretty straightforward. Now we need to update +the things that define the variables to set up the alloca. We'll start +with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for +the unabridged code): + +.. code-block:: c++ + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + ... + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + ... + +This code is virtually identical to the code `before we allowed mutable +variables `_. The big difference is that we +no longer have to construct a PHI node, and we use load/store to access +the variable as needed. + +To support mutable argument variables, we need to also make allocas for +them. The code for this is also pretty simple: + +.. code-block:: c++ + + /// CreateArgumentAllocas - Create an alloca for each argument and register the + /// argument in the symbol table so that references to it will succeed. + void PrototypeAST::CreateArgumentAllocas(Function *F) { + Function::arg_iterator AI = F->arg_begin(); + for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + + // Store the initial value into the alloca. + Builder.CreateStore(AI, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = Alloca; + } + } + +For each argument, we make an alloca, store the input value to the +function into the alloca, and register the alloca as the memory location +for the argument. This method gets invoked by ``FunctionAST::Codegen`` +right after it sets up the entry block for the function. + +The final missing piece is adding the mem2reg pass, which allows us to +get good codegen once again: + +.. code-block:: c++ + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Promote allocas to registers. + OurFPM.add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + +It is interesting to see what the code looks like before and after the +mem2reg optimization runs. For example, this is the before/after code +for our recursive fib function. Before the optimization: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %x1 = alloca double + store double %x, double* %x1 + %x2 = load double* %x1 + %cmptmp = fcmp ult double %x2, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: ; preds = %entry + br label %ifcont + + else: ; preds = %entry + %x3 = load double* %x1 + %subtmp = fsub double %x3, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %x4 = load double* %x1 + %subtmp5 = fsub double %x4, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ] + ret double %iftmp + } + +Here there is only one variable (x, the input argument) but you can +still see the extremely simple-minded code generation strategy we are +using. In the entry block, an alloca is created, and the initial input +value is stored into it. Each reference to the variable does a reload +from the stack. Also, note that we didn't modify the if/then/else +expression, so it still inserts a PHI node. While we could make an +alloca for it, it is actually easier to create a PHI node for it, so we +still just make the PHI. + +Here is the code after the mem2reg pass runs: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %cmptmp = fcmp ult double %x, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: + br label %ifcont + + else: + %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %subtmp5 = fsub double %x, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ] + ret double %iftmp + } + +This is a trivial case for mem2reg, since there are no redefinitions of +the variable. The point of showing this is to calm your tension about +inserting such blatent inefficiencies :). + +After the rest of the optimizers run, we get: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %cmptmp = fcmp ult double %x, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp ueq double %booltmp, 0.000000e+00 + br i1 %ifcond, label %else, label %ifcont + + else: + %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %subtmp5 = fsub double %x, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + ret double %addtmp + + ifcont: + ret double 1.000000e+00 + } + +Here we see that the simplifycfg pass decided to clone the return +instruction into the end of the 'else' block. This allowed it to +eliminate some branches and the PHI node. + +Now that all symbol table references are updated to use stack variables, +we'll add the assignment operator. + +New Assignment Operator +======================= + +With our current framework, adding a new assignment operator is really +simple. We will parse it just like any other binary operator, but handle +it internally (instead of allowing the user to define it). The first +step is to set a precedence: + +.. code-block:: c++ + + int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + +Now that the parser knows the precedence of the binary operator, it +takes care of all the parsing and AST generation. We just need to +implement codegen for the assignment operator. This looks like: + +.. code-block:: c++ + + Value *BinaryExprAST::Codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + VariableExprAST *LHSE = dynamic_cast(LHS); + if (!LHSE) + return ErrorV("destination of '=' must be a variable"); + +Unlike the rest of the binary operators, our assignment operator doesn't +follow the "emit LHS, emit RHS, do computation" model. As such, it is +handled as a special case before the other binary operators are handled. +The other strange thing is that it requires the LHS to be a variable. It +is invalid to have "(x+1) = expr" - only things like "x = expr" are +allowed. + +.. code-block:: c++ + + // Codegen the RHS. + Value *Val = RHS->Codegen(); + if (Val == 0) return 0; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (Variable == 0) return ErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + ... + +Once we have the variable, codegen'ing the assignment is +straightforward: we emit the RHS of the assignment, create a store, and +return the computed value. Returning a value allows for chained +assignments like "X = (Y = Z)". + +Now that we have an assignment operator, we can mutate loop variables +and arguments. For example, we can now run code like this: + +:: + + # Function to print a double. + extern printd(x); + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y; + + def test(x) + printd(x) : + x = 4 : + printd(x); + + test(123); + +When run, this example prints "123" and then "4", showing that we did +actually mutate the value! Okay, we have now officially implemented our +goal: getting this to work requires SSA construction in the general +case. However, to be really useful, we want the ability to define our +own local variables, lets add this next! + +User-defined Local Variables +============================ + +Adding var/in is just like any other other extensions we made to +Kaleidoscope: we extend the lexer, the parser, the AST and the code +generator. The first step for adding our new 'var/in' construct is to +extend the lexer. As before, this is pretty trivial, the code looks like +this: + +.. code-block:: c++ + + enum Token { + ... + // var definition + tok_var = -13 + ... + } + ... + static int gettok() { + ... + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "var") return tok_var; + return tok_identifier; + ... + +The next step is to define the AST node that we will construct. For +var/in, it looks like this: + +.. code-block:: c++ + + /// VarExprAST - Expression class for var/in + class VarExprAST : public ExprAST { + std::vector > VarNames; + ExprAST *Body; + public: + VarExprAST(const std::vector > &varnames, + ExprAST *body) + : VarNames(varnames), Body(body) {} + + virtual Value *Codegen(); + }; + +var/in allows a list of names to be defined all at once, and each name +can optionally have an initializer value. As such, we capture this +information in the VarNames vector. Also, var/in has a body, this body +is allowed to access the variables defined by the var/in. + +With this in place, we can define the parser pieces. The first thing we +do is add it as a primary expression: + +.. code-block:: c++ + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + /// ::= ifexpr + /// ::= forexpr + /// ::= varexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + case tok_var: return ParseVarExpr(); + } + } + +Next we define ParseVarExpr: + +.. code-block:: c++ + + /// varexpr ::= 'var' identifier ('=' expression)? + // (',' identifier ('=' expression)?)* 'in' expression + static ExprAST *ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector > VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return Error("expected identifier after var"); + +The first part of this code parses the list of identifier/expr pairs +into the local ``VarNames`` vector. + +.. code-block:: c++ + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + ExprAST *Init = 0; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (Init == 0) return 0; + } + + VarNames.push_back(std::make_pair(Name, Init)); + + // End of var list, exit loop. + if (CurTok != ',') break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return Error("expected identifier list after var"); + } + +Once all the variables are parsed, we then parse the body and create the +AST node: + +.. code-block:: c++ + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return Error("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new VarExprAST(VarNames, Body); + } + +Now that we can parse and represent the code, we need to support +emission of LLVM IR for it. This code starts out with: + +.. code-block:: c++ + + Value *VarExprAST::Codegen() { + std::vector OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second; + +Basically it loops over all the variables, installing them one at a +time. For each variable we put into the symbol table, we remember the +previous value that we replace in OldBindings. + +.. code-block:: c++ + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->Codegen(); + if (InitVal == 0) return 0; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + +There are more comments here than code. The basic idea is that we emit +the initializer, create the alloca, then update the symbol table to +point to it. Once all the variables are installed in the symbol table, +we evaluate the body of the var/in expression: + +.. code-block:: c++ + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->Codegen(); + if (BodyVal == 0) return 0; + +Finally, before returning, we restore the previous variable bindings: + +.. code-block:: c++ + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; + } + +The end result of all of this is that we get properly scoped variable +definitions, and we even (trivially) allow mutation of them :). + +With this, we completed what we set out to do. Our nice iterative fib +example from the intro compiles and runs just fine. The mem2reg pass +optimizes all of our stack variables into SSA registers, inserting PHI +nodes where needed, and our front-end remains simple: no "iterated +dominance frontier" computation anywhere in sight. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +mutable variables and var/in support. To build this example, use: + +.. code-block:: bash + + # Compile + clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy + # Run + ./toy + +Here is the code: + +.. code-block:: c++ + + #include "llvm/DerivedTypes.h" + #include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/ExecutionEngine/JIT.h" + #include "llvm/IRBuilder.h" + #include "llvm/LLVMContext.h" + #include "llvm/Module.h" + #include "llvm/PassManager.h" + #include "llvm/Analysis/Verifier.h" + #include "llvm/Analysis/Passes.h" + #include "llvm/DataLayout.h" + #include "llvm/Transforms/Scalar.h" + #include "llvm/Support/TargetSelect.h" + #include + #include + #include + #include + using namespace llvm; + + //===----------------------------------------------------------------------===// + // Lexer + //===----------------------------------------------------------------------===// + + // The lexer returns tokens [0-255] if it is an unknown character, otherwise one + // of these for known things. + enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12, + + // var definition + tok_var = -13 + }; + + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number + + /// gettok - Return the next token from standard input. + static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "var") return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; + } + + //===----------------------------------------------------------------------===// + // Abstract Syntax Tree (aka Parse Tree) + //===----------------------------------------------------------------------===// + + /// ExprAST - Base class for all expression nodes. + class ExprAST { + public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; + }; + + /// NumberExprAST - Expression class for numeric literals like "1.0". + class NumberExprAST : public ExprAST { + double Val; + public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); + }; + + /// VariableExprAST - Expression class for referencing a variable, like "a". + class VariableExprAST : public ExprAST { + std::string Name; + public: + VariableExprAST(const std::string &name) : Name(name) {} + const std::string &getName() const { return Name; } + virtual Value *Codegen(); + }; + + /// UnaryExprAST - Expression class for a unary operator. + class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; + public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); + }; + + /// BinaryExprAST - Expression class for a binary operator. + class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; + public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); + }; + + /// CallExprAST - Expression class for function calls. + class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; + public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); + }; + + /// IfExprAST - Expression class for if/then/else. + class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; + public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); + }; + + /// ForExprAST - Expression class for for/in. + class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; + public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); + }; + + /// VarExprAST - Expression class for var/in + class VarExprAST : public ExprAST { + std::vector > VarNames; + ExprAST *Body; + public: + VarExprAST(const std::vector > &varnames, + ExprAST *body) + : VarNames(varnames), Body(body) {} + + virtual Value *Codegen(); + }; + + /// PrototypeAST - This class represents the "prototype" for a function, + /// which captures its name, and its argument names (thus implicitly the number + /// of arguments the function takes), as well as if it is an operator. + class PrototypeAST { + std::string Name; + std::vector Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. + public: + PrototypeAST(const std::string &name, const std::vector &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + + void CreateArgumentAllocas(Function *F); + }; + + /// FunctionAST - This class represents a function definition itself. + class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; + public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); + }; + + //===----------------------------------------------------------------------===// + // Parser + //===----------------------------------------------------------------------===// + + /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current + /// token the parser is looking at. getNextToken reads another token from the + /// lexer and updates CurTok with its results. + static int CurTok; + static int getNextToken() { + return CurTok = gettok(); + } + + /// BinopPrecedence - This holds the precedence for each binary operator that is + /// defined. + static std::map BinopPrecedence; + + /// GetTokPrecedence - Get the precedence of the pending binary operator token. + static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; + } + + /// Error* - These are little helper functions for error handling. + ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} + PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } + FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + + static ExprAST *ParseExpression(); + + /// identifierexpr + /// ::= identifier + /// ::= identifier '(' expression* ')' + static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); + } + + /// numberexpr ::= number + static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; + } + + /// parenexpr ::= '(' expression ')' + static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; + } + + /// ifexpr ::= 'if' expression 'then' expression 'else' expression + static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); + } + + /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression + static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); + } + + /// varexpr ::= 'var' identifier ('=' expression)? + // (',' identifier ('=' expression)?)* 'in' expression + static ExprAST *ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector > VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return Error("expected identifier after var"); + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + ExprAST *Init = 0; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (Init == 0) return 0; + } + + VarNames.push_back(std::make_pair(Name, Init)); + + // End of var list, exit loop. + if (CurTok != ',') break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return Error("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return Error("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new VarExprAST(VarNames, Body); + } + + /// primary + /// ::= identifierexpr + /// ::= numberexpr + /// ::= parenexpr + /// ::= ifexpr + /// ::= forexpr + /// ::= varexpr + static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + case tok_var: return ParseVarExpr(); + } + } + + /// unary + /// ::= primary + /// ::= '!' unary + static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; + } + + /// binoprhs + /// ::= ('+' unary)* + static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } + } + + /// expression + /// ::= unary binoprhs + /// + static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); + } + + /// prototype + /// ::= id '(' id* ')' + /// ::= binary LETTER number? (id, id) + /// ::= unary LETTER (id) + static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + } + + /// definition ::= 'def' prototype expression + static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; + } + + /// toplevelexpr ::= expression + static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; + } + + /// external ::= 'extern' prototype + static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); + } + + //===----------------------------------------------------------------------===// + // Code Generation + //===----------------------------------------------------------------------===// + + static Module *TheModule; + static IRBuilder<> Builder(getGlobalContext()); + static std::map NamedValues; + static FunctionPassManager *TheFPM; + + Value *ErrorV(const char *Str) { Error(Str); return 0; } + + /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of + /// the function. This is used for mutable variables etc. + static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + const std::string &VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); + } + + Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); + } + + Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (V == 0) return ErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); + } + + Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); + } + + Value *BinaryExprAST::Codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + VariableExprAST *LHSE = dynamic_cast(LHS); + if (!LHSE) + return ErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->Codegen(); + if (Val == 0) return 0; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (Variable == 0) return ErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[2] = { L, R }; + return Builder.CreateCall(F, Ops, "binop"); + } + + Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + } + + Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; + } + + Value *ForExprAST::Codegen() { + // Output this as: + // var = alloca double + // ... + // start = startexpr + // store start -> var + // goto loop + // loop: + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // endcond = endexpr + // + // curvar = load var + // nextvar = curvar + step + // store nextvar -> var + // br endcond, loop, endloop + // outloop: + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); + } + + Value *VarExprAST::Codegen() { + std::vector OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second; + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->Codegen(); + if (InitVal == 0) return 0; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->Codegen(); + if (BodyVal == 0) return 0; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; + } + + Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) + AI->setName(Args[Idx]); + + return F; + } + + /// CreateArgumentAllocas - Create an alloca for each argument and register the + /// argument in the symbol table so that references to it will succeed. + void PrototypeAST::CreateArgumentAllocas(Function *F) { + Function::arg_iterator AI = F->arg_begin(); + for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + + // Store the initial value into the alloca. + Builder.CreateStore(AI, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = Alloca; + } + } + + Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Add all arguments to the symbol table and create their allocas. + Proto->CreateArgumentAllocas(TheFunction); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; + } + + //===----------------------------------------------------------------------===// + // Top-Level parsing and JIT Driver + //===----------------------------------------------------------------------===// + + static ExecutionEngine *TheExecutionEngine; + + static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + /// top ::= definition | external | expression | ';' + static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } + } + + //===----------------------------------------------------------------------===// + // "Library" functions that can be "extern'd" from user code. + //===----------------------------------------------------------------------===// + + /// putchard - putchar that takes a double and returns 0. + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + + /// printd - printf that takes a double prints it as "%f\n", returning 0. + extern "C" + double printd(double X) { + printf("%f\n", X); + return 0; + } + + //===----------------------------------------------------------------------===// + // Main driver code. + //===----------------------------------------------------------------------===// + + int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Promote allocas to registers. + OurFPM.add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; + } + +`Next: Conclusion and other useful LLVM tidbits `_ + diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html deleted file mode 100644 index 50fcd8c6998f..000000000000 --- a/docs/tutorial/LangImpl8.html +++ /dev/null @@ -1,359 +0,0 @@ - - - - - Kaleidoscope: Conclusion and other useful LLVM tidbits - - - - - - - -

Kaleidoscope: Conclusion and other useful LLVM tidbits

- - - - -
-

Written by Chris Lattner

-
- - -

Tutorial Conclusion

- - -
- -

Welcome to the final chapter of the "Implementing a -language with LLVM" tutorial. In the course of this tutorial, we have grown -our little Kaleidoscope language from being a useless toy, to being a -semi-interesting (but probably still useless) toy. :)

- -

It is interesting to see how far we've come, and how little code it has -taken. We built the entire lexer, parser, AST, code generator, and an -interactive run-loop (with a JIT!) by-hand in under 700 lines of -(non-comment/non-blank) code.

- -

Our little language supports a couple of interesting features: it supports -user defined binary and unary operators, it uses JIT compilation for immediate -evaluation, and it supports a few control flow constructs with SSA construction. -

- -

Part of the idea of this tutorial was to show you how easy and fun it can be -to define, build, and play with languages. Building a compiler need not be a -scary or mystical process! Now that you've seen some of the basics, I strongly -encourage you to take the code and hack on it. For example, try adding:

- -
    -
  • global variables - While global variables have questional value in -modern software engineering, they are often useful when putting together quick -little hacks like the Kaleidoscope compiler itself. Fortunately, our current -setup makes it very easy to add global variables: just have value lookup check -to see if an unresolved variable is in the global variable symbol table before -rejecting it. To create a new global variable, make an instance of the LLVM -GlobalVariable class.
  • - -
  • typed variables - Kaleidoscope currently only supports variables of -type double. This gives the language a very nice elegance, because only -supporting one type means that you never have to specify types. Different -languages have different ways of handling this. The easiest way is to require -the user to specify types for every variable definition, and record the type -of the variable in the symbol table along with its Value*.
  • - -
  • arrays, structs, vectors, etc - Once you add types, you can start -extending the type system in all sorts of interesting ways. Simple arrays are -very easy and are quite useful for many different applications. Adding them is -mostly an exercise in learning how the LLVM getelementptr instruction works: it -is so nifty/unconventional, it has its own FAQ! If you add support -for recursive types (e.g. linked lists), make sure to read the section in the LLVM -Programmer's Manual that describes how to construct them.
  • - -
  • standard runtime - Our current language allows the user to access -arbitrary external functions, and we use it for things like "printd" and -"putchard". As you extend the language to add higher-level constructs, often -these constructs make the most sense if they are lowered to calls into a -language-supplied runtime. For example, if you add hash tables to the language, -it would probably make sense to add the routines to a runtime, instead of -inlining them all the way.
  • - -
  • memory management - Currently we can only access the stack in -Kaleidoscope. It would also be useful to be able to allocate heap memory, -either with calls to the standard libc malloc/free interface or with a garbage -collector. If you would like to use garbage collection, note that LLVM fully -supports Accurate Garbage Collection -including algorithms that move objects and need to scan/update the stack.
  • - -
  • debugger support - LLVM supports generation of DWARF Debug info which is understood by -common debuggers like GDB. Adding support for debug info is fairly -straightforward. The best way to understand it is to compile some C/C++ code -with "llvm-gcc -g -O0" and taking a look at what it produces.
  • - -
  • exception handling support - LLVM supports generation of zero cost exceptions which interoperate -with code compiled in other languages. You could also generate code by -implicitly making every function return an error value and checking it. You -could also make explicit use of setjmp/longjmp. There are many different ways -to go here.
  • - -
  • object orientation, generics, database access, complex numbers, -geometric programming, ... - Really, there is -no end of crazy features that you can add to the language.
  • - -
  • unusual domains - We've been talking about applying LLVM to a domain -that many people are interested in: building a compiler for a specific language. -However, there are many other domains that can use compiler technology that are -not typically considered. For example, LLVM has been used to implement OpenGL -graphics acceleration, translate C++ code to ActionScript, and many other -cute and clever things. Maybe you will be the first to JIT compile a regular -expression interpreter into native code with LLVM?
  • - -
- -

-Have fun - try doing something crazy and unusual. Building a language like -everyone else always has, is much less fun than trying something a little crazy -or off the wall and seeing how it turns out. If you get stuck or want to talk -about it, feel free to email the llvmdev mailing -list: it has lots of people who are interested in languages and are often -willing to help out. -

- -

Before we end this tutorial, I want to talk about some "tips and tricks" for generating -LLVM IR. These are some of the more subtle things that may not be obvious, but -are very useful if you want to take advantage of LLVM's capabilities.

- -
- - -

Properties of the LLVM IR

- - -
- -

We have a couple common questions about code in the LLVM IR form - lets just -get these out of the way right now, shall we?

- - -

Target Independence

- - -
- -

Kaleidoscope is an example of a "portable language": any program written in -Kaleidoscope will work the same way on any target that it runs on. Many other -languages have this property, e.g. lisp, java, haskell, javascript, python, etc -(note that while these languages are portable, not all their libraries are).

- -

One nice aspect of LLVM is that it is often capable of preserving target -independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled -program and run it on any target that LLVM supports, even emitting C code and -compiling that on targets that LLVM doesn't support natively. You can trivially -tell that the Kaleidoscope compiler generates target-independent code because it -never queries for any target-specific information when generating code.

- -

The fact that LLVM provides a compact, target-independent, representation for -code gets a lot of people excited. Unfortunately, these people are usually -thinking about C or a language from the C family when they are asking questions -about language portability. I say "unfortunately", because there is really no -way to make (fully general) C code portable, other than shipping the source code -around (and of course, C source code is not actually portable in general -either - ever port a really old application from 32- to 64-bits?).

- -

The problem with C (again, in its full generality) is that it is heavily -laden with target specific assumptions. As one simple example, the preprocessor -often destructively removes target-independence from the code when it processes -the input text:

- -
-
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-
-
- -

While it is possible to engineer more and more complex solutions to problems -like this, it cannot be solved in full generality in a way that is better than shipping -the actual source code.

- -

That said, there are interesting subsets of C that can be made portable. If -you are willing to fix primitive types to a fixed size (say int = 32-bits, -and long = 64-bits), don't care about ABI compatibility with existing binaries, -and are willing to give up some other minor features, you can have portable -code. This can make sense for specialized domains such as an -in-kernel language.

- -
- - -

Safety Guarantees

- - -
- -

Many of the languages above are also "safe" languages: it is impossible for -a program written in Java to corrupt its address space and crash the process -(assuming the JVM has no bugs). -Safety is an interesting property that requires a combination of language -design, runtime support, and often operating system support.

- -

It is certainly possible to implement a safe language in LLVM, but LLVM IR -does not itself guarantee safety. The LLVM IR allows unsafe pointer casts, -use after free bugs, buffer over-runs, and a variety of other problems. Safety -needs to be implemented as a layer on top of LLVM and, conveniently, several -groups have investigated this. Ask on the llvmdev mailing -list if you are interested in more details.

- -
- - -

Language-Specific Optimizations

- - -
- -

One thing about LLVM that turns off many people is that it does not solve all -the world's problems in one system (sorry 'world hunger', someone else will have -to solve you some other day). One specific complaint is that people perceive -LLVM as being incapable of performing high-level language-specific optimization: -LLVM "loses too much information".

- -

Unfortunately, this is really not the place to give you a full and unified -version of "Chris Lattner's theory of compiler design". Instead, I'll make a -few observations:

- -

First, you're right that LLVM does lose information. For example, as of this -writing, there is no way to distinguish in the LLVM IR whether an SSA-value came -from a C "int" or a C "long" on an ILP32 machine (other than debug info). Both -get compiled down to an 'i32' value and the information about what it came from -is lost. The more general issue here, is that the LLVM type system uses -"structural equivalence" instead of "name equivalence". Another place this -surprises people is if you have two types in a high-level language that have the -same structure (e.g. two different structs that have a single int field): these -types will compile down into a single LLVM type and it will be impossible to -tell what it came from.

- -

Second, while LLVM does lose information, LLVM is not a fixed target: we -continue to enhance and improve it in many different ways. In addition to -adding new features (LLVM did not always support exceptions or debug info), we -also extend the IR to capture important information for optimization (e.g. -whether an argument is sign or zero extended, information about pointers -aliasing, etc). Many of the enhancements are user-driven: people want LLVM to -include some specific feature, so they go ahead and extend it.

- -

Third, it is possible and easy to add language-specific -optimizations, and you have a number of choices in how to do it. As one trivial -example, it is easy to add language-specific optimization passes that -"know" things about code compiled for a language. In the case of the C family, -there is an optimization pass that "knows" about the standard C library -functions. If you call "exit(0)" in main(), it knows that it is safe to -optimize that into "return 0;" because C specifies what the 'exit' -function does.

- -

In addition to simple library knowledge, it is possible to embed a variety of -other language-specific information into the LLVM IR. If you have a specific -need and run into a wall, please bring the topic up on the llvmdev list. At the -very worst, you can always treat LLVM as if it were a "dumb code generator" and -implement the high-level optimizations you desire in your front-end, on the -language-specific AST. -

- -
- -
- - -

Tips and Tricks

- - -
- -

There is a variety of useful tips and tricks that you come to know after -working on/with LLVM that aren't obvious at first glance. Instead of letting -everyone rediscover them, this section talks about some of these issues.

- - -

Implementing portable offsetof/sizeof

- - -
- -

One interesting thing that comes up, if you are trying to keep the code -generated by your compiler "target independent", is that you often need to know -the size of some LLVM type or the offset of some field in an llvm structure. -For example, you might need to pass the size of a type into a function that -allocates memory.

- -

Unfortunately, this can vary widely across targets: for example the width of -a pointer is trivially target-specific. However, there is a clever -way to use the getelementptr instruction that allows you to compute this -in a portable way.

- -
- - -

Garbage Collected Stack Frames

- - -
- -

Some languages want to explicitly manage their stack frames, often so that -they are garbage collected or to allow easy implementation of closures. There -are often better ways to implement these features than explicit stack frames, -but LLVM -does support them, if you want. It requires your front-end to convert the -code into Continuation -Passing Style and the use of tail calls (which LLVM also supports).

- -
- -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-07-23 10:51:15 +0200 (Mon, 23 Jul 2012) $ -
- - diff --git a/docs/tutorial/LangImpl8.rst b/docs/tutorial/LangImpl8.rst new file mode 100644 index 000000000000..3534b2e0c931 --- /dev/null +++ b/docs/tutorial/LangImpl8.rst @@ -0,0 +1,267 @@ +====================================================== +Kaleidoscope: Conclusion and other useful LLVM tidbits +====================================================== + +.. contents:: + :local: + +Tutorial Conclusion +=================== + +Welcome to the final chapter of the "`Implementing a language with +LLVM `_" tutorial. In the course of this tutorial, we have +grown our little Kaleidoscope language from being a useless toy, to +being a semi-interesting (but probably still useless) toy. :) + +It is interesting to see how far we've come, and how little code it has +taken. We built the entire lexer, parser, AST, code generator, and an +interactive run-loop (with a JIT!) by-hand in under 700 lines of +(non-comment/non-blank) code. + +Our little language supports a couple of interesting features: it +supports user defined binary and unary operators, it uses JIT +compilation for immediate evaluation, and it supports a few control flow +constructs with SSA construction. + +Part of the idea of this tutorial was to show you how easy and fun it +can be to define, build, and play with languages. Building a compiler +need not be a scary or mystical process! Now that you've seen some of +the basics, I strongly encourage you to take the code and hack on it. +For example, try adding: + +- **global variables** - While global variables have questional value + in modern software engineering, they are often useful when putting + together quick little hacks like the Kaleidoscope compiler itself. + Fortunately, our current setup makes it very easy to add global + variables: just have value lookup check to see if an unresolved + variable is in the global variable symbol table before rejecting it. + To create a new global variable, make an instance of the LLVM + ``GlobalVariable`` class. +- **typed variables** - Kaleidoscope currently only supports variables + of type double. This gives the language a very nice elegance, because + only supporting one type means that you never have to specify types. + Different languages have different ways of handling this. The easiest + way is to require the user to specify types for every variable + definition, and record the type of the variable in the symbol table + along with its Value\*. +- **arrays, structs, vectors, etc** - Once you add types, you can start + extending the type system in all sorts of interesting ways. Simple + arrays are very easy and are quite useful for many different + applications. Adding them is mostly an exercise in learning how the + LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction + works: it is so nifty/unconventional, it `has its own + FAQ <../GetElementPtr.html>`_! If you add support for recursive types + (e.g. linked lists), make sure to read the `section in the LLVM + Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that + describes how to construct them. +- **standard runtime** - Our current language allows the user to access + arbitrary external functions, and we use it for things like "printd" + and "putchard". As you extend the language to add higher-level + constructs, often these constructs make the most sense if they are + lowered to calls into a language-supplied runtime. For example, if + you add hash tables to the language, it would probably make sense to + add the routines to a runtime, instead of inlining them all the way. +- **memory management** - Currently we can only access the stack in + Kaleidoscope. It would also be useful to be able to allocate heap + memory, either with calls to the standard libc malloc/free interface + or with a garbage collector. If you would like to use garbage + collection, note that LLVM fully supports `Accurate Garbage + Collection <../GarbageCollection.html>`_ including algorithms that + move objects and need to scan/update the stack. +- **debugger support** - LLVM supports generation of `DWARF Debug + info <../SourceLevelDebugging.html>`_ which is understood by common + debuggers like GDB. Adding support for debug info is fairly + straightforward. The best way to understand it is to compile some + C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it + produces. +- **exception handling support** - LLVM supports generation of `zero + cost exceptions <../ExceptionHandling.html>`_ which interoperate with + code compiled in other languages. You could also generate code by + implicitly making every function return an error value and checking + it. You could also make explicit use of setjmp/longjmp. There are + many different ways to go here. +- **object orientation, generics, database access, complex numbers, + geometric programming, ...** - Really, there is no end of crazy + features that you can add to the language. +- **unusual domains** - We've been talking about applying LLVM to a + domain that many people are interested in: building a compiler for a + specific language. However, there are many other domains that can use + compiler technology that are not typically considered. For example, + LLVM has been used to implement OpenGL graphics acceleration, + translate C++ code to ActionScript, and many other cute and clever + things. Maybe you will be the first to JIT compile a regular + expression interpreter into native code with LLVM? + +Have fun - try doing something crazy and unusual. Building a language +like everyone else always has, is much less fun than trying something a +little crazy or off the wall and seeing how it turns out. If you get +stuck or want to talk about it, feel free to email the `llvmdev mailing +list `_: it has lots +of people who are interested in languages and are often willing to help +out. + +Before we end this tutorial, I want to talk about some "tips and tricks" +for generating LLVM IR. These are some of the more subtle things that +may not be obvious, but are very useful if you want to take advantage of +LLVM's capabilities. + +Properties of the LLVM IR +========================= + +We have a couple common questions about code in the LLVM IR form - lets +just get these out of the way right now, shall we? + +Target Independence +------------------- + +Kaleidoscope is an example of a "portable language": any program written +in Kaleidoscope will work the same way on any target that it runs on. +Many other languages have this property, e.g. lisp, java, haskell, +javascript, python, etc (note that while these languages are portable, +not all their libraries are). + +One nice aspect of LLVM is that it is often capable of preserving target +independence in the IR: you can take the LLVM IR for a +Kaleidoscope-compiled program and run it on any target that LLVM +supports, even emitting C code and compiling that on targets that LLVM +doesn't support natively. You can trivially tell that the Kaleidoscope +compiler generates target-independent code because it never queries for +any target-specific information when generating code. + +The fact that LLVM provides a compact, target-independent, +representation for code gets a lot of people excited. Unfortunately, +these people are usually thinking about C or a language from the C +family when they are asking questions about language portability. I say +"unfortunately", because there is really no way to make (fully general) +C code portable, other than shipping the source code around (and of +course, C source code is not actually portable in general either - ever +port a really old application from 32- to 64-bits?). + +The problem with C (again, in its full generality) is that it is heavily +laden with target specific assumptions. As one simple example, the +preprocessor often destructively removes target-independence from the +code when it processes the input text: + +.. code-block:: c + + #ifdef __i386__ + int X = 1; + #else + int X = 42; + #endif + +While it is possible to engineer more and more complex solutions to +problems like this, it cannot be solved in full generality in a way that +is better than shipping the actual source code. + +That said, there are interesting subsets of C that can be made portable. +If you are willing to fix primitive types to a fixed size (say int = +32-bits, and long = 64-bits), don't care about ABI compatibility with +existing binaries, and are willing to give up some other minor features, +you can have portable code. This can make sense for specialized domains +such as an in-kernel language. + +Safety Guarantees +----------------- + +Many of the languages above are also "safe" languages: it is impossible +for a program written in Java to corrupt its address space and crash the +process (assuming the JVM has no bugs). Safety is an interesting +property that requires a combination of language design, runtime +support, and often operating system support. + +It is certainly possible to implement a safe language in LLVM, but LLVM +IR does not itself guarantee safety. The LLVM IR allows unsafe pointer +casts, use after free bugs, buffer over-runs, and a variety of other +problems. Safety needs to be implemented as a layer on top of LLVM and, +conveniently, several groups have investigated this. Ask on the `llvmdev +mailing list `_ if +you are interested in more details. + +Language-Specific Optimizations +------------------------------- + +One thing about LLVM that turns off many people is that it does not +solve all the world's problems in one system (sorry 'world hunger', +someone else will have to solve you some other day). One specific +complaint is that people perceive LLVM as being incapable of performing +high-level language-specific optimization: LLVM "loses too much +information". + +Unfortunately, this is really not the place to give you a full and +unified version of "Chris Lattner's theory of compiler design". Instead, +I'll make a few observations: + +First, you're right that LLVM does lose information. For example, as of +this writing, there is no way to distinguish in the LLVM IR whether an +SSA-value came from a C "int" or a C "long" on an ILP32 machine (other +than debug info). Both get compiled down to an 'i32' value and the +information about what it came from is lost. The more general issue +here, is that the LLVM type system uses "structural equivalence" instead +of "name equivalence". Another place this surprises people is if you +have two types in a high-level language that have the same structure +(e.g. two different structs that have a single int field): these types +will compile down into a single LLVM type and it will be impossible to +tell what it came from. + +Second, while LLVM does lose information, LLVM is not a fixed target: we +continue to enhance and improve it in many different ways. In addition +to adding new features (LLVM did not always support exceptions or debug +info), we also extend the IR to capture important information for +optimization (e.g. whether an argument is sign or zero extended, +information about pointers aliasing, etc). Many of the enhancements are +user-driven: people want LLVM to include some specific feature, so they +go ahead and extend it. + +Third, it is *possible and easy* to add language-specific optimizations, +and you have a number of choices in how to do it. As one trivial +example, it is easy to add language-specific optimization passes that +"know" things about code compiled for a language. In the case of the C +family, there is an optimization pass that "knows" about the standard C +library functions. If you call "exit(0)" in main(), it knows that it is +safe to optimize that into "return 0;" because C specifies what the +'exit' function does. + +In addition to simple library knowledge, it is possible to embed a +variety of other language-specific information into the LLVM IR. If you +have a specific need and run into a wall, please bring the topic up on +the llvmdev list. At the very worst, you can always treat LLVM as if it +were a "dumb code generator" and implement the high-level optimizations +you desire in your front-end, on the language-specific AST. + +Tips and Tricks +=============== + +There is a variety of useful tips and tricks that you come to know after +working on/with LLVM that aren't obvious at first glance. Instead of +letting everyone rediscover them, this section talks about some of these +issues. + +Implementing portable offsetof/sizeof +------------------------------------- + +One interesting thing that comes up, if you are trying to keep the code +generated by your compiler "target independent", is that you often need +to know the size of some LLVM type or the offset of some field in an +llvm structure. For example, you might need to pass the size of a type +into a function that allocates memory. + +Unfortunately, this can vary widely across targets: for example the +width of a pointer is trivially target-specific. However, there is a +`clever way to use the getelementptr +instruction `_ +that allows you to compute this in a portable way. + +Garbage Collected Stack Frames +------------------------------ + +Some languages want to explicitly manage their stack frames, often so +that they are garbage collected or to allow easy implementation of +closures. There are often better ways to implement these features than +explicit stack frames, but `LLVM does support +them, `_ +if you want. It requires your front-end to convert the code into +`Continuation Passing +Style `_ and +the use of tail calls (which LLVM also supports). + diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html deleted file mode 100644 index 86a395a3a873..000000000000 --- a/docs/tutorial/OCamlLangImpl1.html +++ /dev/null @@ -1,365 +0,0 @@ - - - - - Kaleidoscope: Tutorial Introduction and the Lexer - - - - - - - - -

Kaleidoscope: Tutorial Introduction and the Lexer

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Tutorial Introduction

- - -
- -

Welcome to the "Implementing a language with LLVM" tutorial. This tutorial -runs through the implementation of a simple language, showing how fun and -easy it can be. This tutorial will get you up and started as well as help to -build a framework you can extend to other languages. The code in this tutorial -can also be used as a playground to hack on other LLVM specific things. -

- -

-The goal of this tutorial is to progressively unveil our language, describing -how it is built up over time. This will let us cover a fairly broad range of -language design and LLVM-specific usage issues, showing and explaining the code -for it all along the way, without overwhelming you with tons of details up -front.

- -

It is useful to point out ahead of time that this tutorial is really about -teaching compiler techniques and LLVM specifically, not about teaching -modern and sane software engineering principles. In practice, this means that -we'll take a number of shortcuts to simplify the exposition. For example, the -code leaks memory, uses global variables all over the place, doesn't use nice -design patterns like visitors, etc... but it -is very simple. If you dig in and use the code as a basis for future projects, -fixing these deficiencies shouldn't be hard.

- -

I've tried to put this tutorial together in a way that makes chapters easy to -skip over if you are already familiar with or are uninterested in the various -pieces. The structure of the tutorial is: -

- -
    -
  • Chapter #1: Introduction to the Kaleidoscope -language, and the definition of its Lexer - This shows where we are going -and the basic functionality that we want it to do. In order to make this -tutorial maximally understandable and hackable, we choose to implement -everything in Objective Caml instead of using lexer and parser generators. -LLVM obviously works just fine with such tools, feel free to use one if you -prefer.
  • -
  • Chapter #2: Implementing a Parser and -AST - With the lexer in place, we can talk about parsing techniques and -basic AST construction. This tutorial describes recursive descent parsing and -operator precedence parsing. Nothing in Chapters 1 or 2 is LLVM-specific, -the code doesn't even link in LLVM at this point. :)
  • -
  • Chapter #3: Code generation to LLVM -IR - With the AST ready, we can show off how easy generation of LLVM IR -really is.
  • -
  • Chapter #4: Adding JIT and Optimizer -Support - Because a lot of people are interested in using LLVM as a JIT, -we'll dive right into it and show you the 3 lines it takes to add JIT support. -LLVM is also useful in many other ways, but this is one simple and "sexy" way -to shows off its power. :)
  • -
  • Chapter #5: Extending the Language: -Control Flow - With the language up and running, we show how to extend it -with control flow operations (if/then/else and a 'for' loop). This gives us a -chance to talk about simple SSA construction and control flow.
  • -
  • Chapter #6: Extending the Language: -User-defined Operators - This is a silly but fun chapter that talks about -extending the language to let the user program define their own arbitrary -unary and binary operators (with assignable precedence!). This lets us build a -significant piece of the "language" as library routines.
  • -
  • Chapter #7: Extending the Language: -Mutable Variables - This chapter talks about adding user-defined local -variables along with an assignment operator. The interesting part about this -is how easy and trivial it is to construct SSA form in LLVM: no, LLVM does -not require your front-end to construct SSA form!
  • -
  • Chapter #8: Conclusion and other -useful LLVM tidbits - This chapter wraps up the series by talking about -potential ways to extend the language, but also includes a bunch of pointers to -info about "special topics" like adding garbage collection support, exceptions, -debugging, support for "spaghetti stacks", and a bunch of other tips and -tricks.
  • - -
- -

By the end of the tutorial, we'll have written a bit less than 700 lines of -non-comment, non-blank, lines of code. With this small amount of code, we'll -have built up a very reasonable compiler for a non-trivial language including -a hand-written lexer, parser, AST, as well as code generation support with a JIT -compiler. While other systems may have interesting "hello world" tutorials, -I think the breadth of this tutorial is a great testament to the strengths of -LLVM and why you should consider it if you're interested in language or compiler -design.

- -

A note about this tutorial: we expect you to extend the language and play -with it on your own. Take the code and go crazy hacking away at it, compilers -don't need to be scary creatures - it can be a lot of fun to play with -languages!

- -
- - -

The Basic Language

- - -
- -

This tutorial will be illustrated with a toy language that we'll call -"Kaleidoscope" (derived -from "meaning beautiful, form, and view"). -Kaleidoscope is a procedural language that allows you to define functions, use -conditionals, math, etc. Over the course of the tutorial, we'll extend -Kaleidoscope to support the if/then/else construct, a for loop, user defined -operators, JIT compilation with a simple command line interface, etc.

- -

Because we want to keep things simple, the only datatype in Kaleidoscope is a -64-bit floating point type (aka 'float' in O'Caml parlance). As such, all -values are implicitly double precision and the language doesn't require type -declarations. This gives the language a very nice and simple syntax. For -example, the following simple example computes Fibonacci numbers:

- -
-
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x < 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-
-
- -

We also allow Kaleidoscope to call into standard library functions (the LLVM -JIT makes this completely trivial). This means that you can use the 'extern' -keyword to define a function before you use it (this is also useful for mutually -recursive functions). For example:

- -
-
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-
-
- -

A more interesting example is included in Chapter 6 where we write a little -Kaleidoscope application that displays -a Mandelbrot Set at various levels of magnification.

- -

Lets dive into the implementation of this language!

- -
- - -

The Lexer

- - -
- -

When it comes to implementing a language, the first thing needed is -the ability to process a text file and recognize what it says. The traditional -way to do this is to use a "lexer" (aka 'scanner') -to break the input up into "tokens". Each token returned by the lexer includes -a token code and potentially some metadata (e.g. the numeric value of a number). -First, we define the possibilities: -

- -
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-
- -

Each token returned by our lexer will be one of the token variant values. -An unknown character like '+' will be returned as Token.Kwd '+'. If -the curr token is an identifier, the value will be Token.Ident s. If -the current token is a numeric literal (like 1.0), the value will be -Token.Number 1.0. -

- -

The actual implementation of the lexer is a collection of functions driven -by a function named Lexer.lex. The Lexer.lex function is -called to return the next token from standard input. We will use -Camlp4 -to simplify the tokenization of the standard input. Its definition starts -as:

- -
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-
- -

-Lexer.lex works by recursing over a char Stream.t to read -characters one at a time from the standard input. It eats them as it recognizes -them and stores them in in a Token.token variant. The first thing that -it has to do is ignore whitespace between tokens. This is accomplished with the -recursive call above.

- -

The next thing Lexer.lex needs to do is recognize identifiers and -specific keywords like "def". Kaleidoscope does this with a pattern match -and a helper function.

- -

-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-...
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-
- -

Numeric values are similar:

- -
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-...
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-
- -

This is all pretty straight-forward code for processing input. When reading -a numeric value from input, we use the ocaml float_of_string function -to convert it to a numeric value that we store in Token.Number. Note -that this isn't doing sufficient error checking: it will raise Failure -if the string "1.23.45.67". Feel free to extend it :). Next we handle -comments: -

- -
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-...
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -

We handle comments by skipping to the end of the line and then return the -next token. Finally, if the input doesn't match one of the above cases, it is -either an operator character like '+' or the end of the file. These are handled -with this code:

- -
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-
- -

With this, we have the complete lexer for the basic Kaleidoscope language -(the full code listing for the Lexer is -available in the next chapter of the -tutorial). Next we'll build a simple parser that -uses this to build an Abstract Syntax Tree. When we have that, we'll -include a driver so that you can use the lexer and parser together. -

- -Next: Implementing a Parser and AST -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl1.rst b/docs/tutorial/OCamlLangImpl1.rst new file mode 100644 index 000000000000..94ca3a5aa4d3 --- /dev/null +++ b/docs/tutorial/OCamlLangImpl1.rst @@ -0,0 +1,285 @@ +================================================= +Kaleidoscope: Tutorial Introduction and the Lexer +================================================= + +.. contents:: + :local: + +Tutorial Introduction +===================== + +Welcome to the "Implementing a language with LLVM" tutorial. This +tutorial runs through the implementation of a simple language, showing +how fun and easy it can be. This tutorial will get you up and started as +well as help to build a framework you can extend to other languages. The +code in this tutorial can also be used as a playground to hack on other +LLVM specific things. + +The goal of this tutorial is to progressively unveil our language, +describing how it is built up over time. This will let us cover a fairly +broad range of language design and LLVM-specific usage issues, showing +and explaining the code for it all along the way, without overwhelming +you with tons of details up front. + +It is useful to point out ahead of time that this tutorial is really +about teaching compiler techniques and LLVM specifically, *not* about +teaching modern and sane software engineering principles. In practice, +this means that we'll take a number of shortcuts to simplify the +exposition. For example, the code leaks memory, uses global variables +all over the place, doesn't use nice design patterns like +`visitors `_, etc... but +it is very simple. If you dig in and use the code as a basis for future +projects, fixing these deficiencies shouldn't be hard. + +I've tried to put this tutorial together in a way that makes chapters +easy to skip over if you are already familiar with or are uninterested +in the various pieces. The structure of the tutorial is: + +- `Chapter #1 <#language>`_: Introduction to the Kaleidoscope + language, and the definition of its Lexer - This shows where we are + going and the basic functionality that we want it to do. In order to + make this tutorial maximally understandable and hackable, we choose + to implement everything in Objective Caml instead of using lexer and + parser generators. LLVM obviously works just fine with such tools, + feel free to use one if you prefer. +- `Chapter #2 `_: Implementing a Parser and + AST - With the lexer in place, we can talk about parsing techniques + and basic AST construction. This tutorial describes recursive descent + parsing and operator precedence parsing. Nothing in Chapters 1 or 2 + is LLVM-specific, the code doesn't even link in LLVM at this point. + :) +- `Chapter #3 `_: Code generation to LLVM IR - + With the AST ready, we can show off how easy generation of LLVM IR + really is. +- `Chapter #4 `_: Adding JIT and Optimizer + Support - Because a lot of people are interested in using LLVM as a + JIT, we'll dive right into it and show you the 3 lines it takes to + add JIT support. LLVM is also useful in many other ways, but this is + one simple and "sexy" way to shows off its power. :) +- `Chapter #5 `_: Extending the Language: + Control Flow - With the language up and running, we show how to + extend it with control flow operations (if/then/else and a 'for' + loop). This gives us a chance to talk about simple SSA construction + and control flow. +- `Chapter #6 `_: Extending the Language: + User-defined Operators - This is a silly but fun chapter that talks + about extending the language to let the user program define their own + arbitrary unary and binary operators (with assignable precedence!). + This lets us build a significant piece of the "language" as library + routines. +- `Chapter #7 `_: Extending the Language: + Mutable Variables - This chapter talks about adding user-defined + local variables along with an assignment operator. The interesting + part about this is how easy and trivial it is to construct SSA form + in LLVM: no, LLVM does *not* require your front-end to construct SSA + form! +- `Chapter #8 `_: Conclusion and other useful + LLVM tidbits - This chapter wraps up the series by talking about + potential ways to extend the language, but also includes a bunch of + pointers to info about "special topics" like adding garbage + collection support, exceptions, debugging, support for "spaghetti + stacks", and a bunch of other tips and tricks. + +By the end of the tutorial, we'll have written a bit less than 700 lines +of non-comment, non-blank, lines of code. With this small amount of +code, we'll have built up a very reasonable compiler for a non-trivial +language including a hand-written lexer, parser, AST, as well as code +generation support with a JIT compiler. While other systems may have +interesting "hello world" tutorials, I think the breadth of this +tutorial is a great testament to the strengths of LLVM and why you +should consider it if you're interested in language or compiler design. + +A note about this tutorial: we expect you to extend the language and +play with it on your own. Take the code and go crazy hacking away at it, +compilers don't need to be scary creatures - it can be a lot of fun to +play with languages! + +The Basic Language +================== + +This tutorial will be illustrated with a toy language that we'll call +"`Kaleidoscope `_" (derived +from "meaning beautiful, form, and view"). Kaleidoscope is a procedural +language that allows you to define functions, use conditionals, math, +etc. Over the course of the tutorial, we'll extend Kaleidoscope to +support the if/then/else construct, a for loop, user defined operators, +JIT compilation with a simple command line interface, etc. + +Because we want to keep things simple, the only datatype in Kaleidoscope +is a 64-bit floating point type (aka 'float' in O'Caml parlance). As +such, all values are implicitly double precision and the language +doesn't require type declarations. This gives the language a very nice +and simple syntax. For example, the following simple example computes +`Fibonacci numbers: `_ + +:: + + # Compute the x'th fibonacci number. + def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2) + + # This expression will compute the 40th number. + fib(40) + +We also allow Kaleidoscope to call into standard library functions (the +LLVM JIT makes this completely trivial). This means that you can use the +'extern' keyword to define a function before you use it (this is also +useful for mutually recursive functions). For example: + +:: + + extern sin(arg); + extern cos(arg); + extern atan2(arg1 arg2); + + atan2(sin(.4), cos(42)) + +A more interesting example is included in Chapter 6 where we write a +little Kaleidoscope application that `displays a Mandelbrot +Set `_ at various levels of magnification. + +Lets dive into the implementation of this language! + +The Lexer +========= + +When it comes to implementing a language, the first thing needed is the +ability to process a text file and recognize what it says. The +traditional way to do this is to use a +"`lexer `_" (aka +'scanner') to break the input up into "tokens". Each token returned by +the lexer includes a token code and potentially some metadata (e.g. the +numeric value of a number). First, we define the possibilities: + +.. code-block:: ocaml + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + +Each token returned by our lexer will be one of the token variant +values. An unknown character like '+' will be returned as +``Token.Kwd '+'``. If the curr token is an identifier, the value will be +``Token.Ident s``. If the current token is a numeric literal (like 1.0), +the value will be ``Token.Number 1.0``. + +The actual implementation of the lexer is a collection of functions +driven by a function named ``Lexer.lex``. The ``Lexer.lex`` function is +called to return the next token from standard input. We will use +`Camlp4 `_ to +simplify the tokenization of the standard input. Its definition starts +as: + +.. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + +``Lexer.lex`` works by recursing over a ``char Stream.t`` to read +characters one at a time from the standard input. It eats them as it +recognizes them and stores them in in a ``Token.token`` variant. The +first thing that it has to do is ignore whitespace between tokens. This +is accomplished with the recursive call above. + +The next thing ``Lexer.lex`` needs to do is recognize identifiers and +specific keywords like "def". Kaleidoscope does this with a pattern +match and a helper function. + +.. code-block:: ocaml + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + ... + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + +Numeric values are similar: + +.. code-block:: ocaml + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + ... + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +This is all pretty straight-forward code for processing input. When +reading a numeric value from input, we use the ocaml ``float_of_string`` +function to convert it to a numeric value that we store in +``Token.Number``. Note that this isn't doing sufficient error checking: +it will raise ``Failure`` if the string "1.23.45.67". Feel free to +extend it :). Next we handle comments: + +.. code-block:: ocaml + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + ... + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +We handle comments by skipping to the end of the line and then return +the next token. Finally, if the input doesn't match one of the above +cases, it is either an operator character like '+' or the end of the +file. These are handled with this code: + +.. code-block:: ocaml + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +With this, we have the complete lexer for the basic Kaleidoscope +language (the `full code listing `_ for the +Lexer is available in the `next chapter `_ of the +tutorial). Next we'll `build a simple parser that uses this to build an +Abstract Syntax Tree `_. When we have that, we'll +include a driver so that you can use the lexer and parser together. + +`Next: Implementing a Parser and AST `_ + diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html deleted file mode 100644 index 9bb4c40361c5..000000000000 --- a/docs/tutorial/OCamlLangImpl2.html +++ /dev/null @@ -1,1043 +0,0 @@ - - - - - Kaleidoscope: Implementing a Parser and AST - - - - - - - - -

Kaleidoscope: Implementing a Parser and AST

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 2 Introduction

- - -
- -

Welcome to Chapter 2 of the "Implementing a language -with LLVM in Objective Caml" tutorial. This chapter shows you how to use -the lexer, built in Chapter 1, to build a -full parser for our -Kaleidoscope language. Once we have a parser, we'll define and build an Abstract Syntax -Tree (AST).

- -

The parser we will build uses a combination of Recursive Descent -Parsing and Operator-Precedence -Parsing to parse the Kaleidoscope language (the latter for -binary expressions and the former for everything else). Before we get to -parsing though, lets talk about the output of the parser: the Abstract Syntax -Tree.

- -
- - -

The Abstract Syntax Tree (AST)

- - -
- -

The AST for a program captures its behavior in such a way that it is easy for -later stages of the compiler (e.g. code generation) to interpret. We basically -want one object for each construct in the language, and the AST should closely -model the language. In Kaleidoscope, we have expressions, a prototype, and a -function object. We'll start with expressions first:

- -
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-
- -

The code above shows the definition of the base ExprAST class and one -subclass which we use for numeric literals. The important thing to note about -this code is that the Number variant captures the numeric value of the -literal as an instance variable. This allows later phases of the compiler to -know what the stored numeric value is.

- -

Right now we only create the AST, so there are no useful functions on -them. It would be very easy to add a function to pretty print the code, -for example. Here are the other expression AST node definitions that we'll use -in the basic form of the Kaleidoscope language: -

- -
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-
- -

This is all (intentionally) rather straight-forward: variables capture the -variable name, binary operators capture their opcode (e.g. '+'), and calls -capture a function name as well as a list of any argument expressions. One thing -that is nice about our AST is that it captures the language features without -talking about the syntax of the language. Note that there is no discussion about -precedence of binary operators, lexical structure, etc.

- -

For our basic language, these are all of the expression nodes we'll define. -Because it doesn't have conditional control flow, it isn't Turing-complete; -we'll fix that in a later installment. The two things we need next are a way -to talk about the interface to a function, and a way to talk about functions -themselves:

- -
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -

In Kaleidoscope, functions are typed with just a count of their arguments. -Since all values are double precision floating point, the type of each argument -doesn't need to be stored anywhere. In a more aggressive and realistic -language, the "expr" variants would probably have a type field.

- -

With this scaffolding, we can now talk about parsing expressions and function -bodies in Kaleidoscope.

- -
- - -

Parser Basics

- - -
- -

Now that we have an AST to build, we need to define the parser code to build -it. The idea here is that we want to parse something like "x+y" (which is -returned as three tokens by the lexer) into an AST that could be generated with -calls like this:

- -
-
-  let x = Variable "x" in
-  let y = Variable "y" in
-  let result = Binary ('+', x, y) in
-  ...
-
-
- -

-The error handling routines make use of the builtin Stream.Failure and -Stream.Errors. Stream.Failure is raised when the parser is -unable to find any matching token in the first position of a pattern. -Stream.Error is raised when the first token matches, but the rest do -not. The error recovery in our parser will not be the best and is not -particular user-friendly, but it will be enough for our tutorial. These -exceptions make it easier to handle errors in routines that have various return -types.

- -

With these basic types and exceptions, we can implement the first -piece of our grammar: numeric literals.

- -
- - -

Basic Expression Parsing

- - -
- -

We start with numeric literals, because they are the simplest to process. -For each production in our grammar, we'll define a function which parses that -production. We call this class of expressions "primary" expressions, for -reasons that will become more clear -later in the tutorial. In order to parse an arbitrary primary expression, -we need to determine what sort of expression it is. For numeric literals, we -have:

- -
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-
- -

This routine is very simple: it expects to be called when the current token -is a Token.Number token. It takes the current number value, creates -a Ast.Number node, advances the lexer to the next token, and finally -returns.

- -

There are some interesting aspects to this. The most important one is that -this routine eats all of the tokens that correspond to the production and -returns the lexer buffer with the next token (which is not part of the grammar -production) ready to go. This is a fairly standard way to go for recursive -descent parsers. For a better example, the parenthesis operator is defined like -this:

- -
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-
- -

This function illustrates a number of interesting things about the -parser:

- -

-1) It shows how we use the Stream.Error exception. When called, this -function expects that the current token is a '(' token, but after parsing the -subexpression, it is possible that there is no ')' waiting. For example, if -the user types in "(4 x" instead of "(4)", the parser should emit an error. -Because errors can occur, the parser needs a way to indicate that they -happened. In our parser, we use the camlp4 shortcut syntax token ?? "parse -error", where if the token before the ?? does not match, then -Stream.Error "parse error" will be raised.

- -

2) Another interesting aspect of this function is that it uses recursion by -calling Parser.parse_primary (we will soon see that -Parser.parse_primary can call Parser.parse_primary). This is -powerful because it allows us to handle recursive grammars, and keeps each -production very simple. Note that parentheses do not cause construction of AST -nodes themselves. While we could do it this way, the most important role of -parentheses are to guide the parser and provide grouping. Once the parser -constructs the AST, parentheses are not needed.

- -

The next simple production is for handling variable references and function -calls:

- -
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-
- -

This routine follows the same style as the other routines. (It expects to be -called if the current token is a Token.Ident token). It also has -recursion and error handling. One interesting aspect of this is that it uses -look-ahead to determine if the current identifier is a stand alone -variable reference or if it is a function call expression. It handles this by -checking to see if the token after the identifier is a '(' token, constructing -either a Ast.Variable or Ast.Call node as appropriate. -

- -

We finish up by raising an exception if we received a token we didn't -expect:

- -
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-
- -

Now that basic expressions are handled, we need to handle binary expressions. -They are a bit more complex.

- -
- - -

Binary Expression Parsing

- - -
- -

Binary expressions are significantly harder to parse because they are often -ambiguous. For example, when given the string "x+y*z", the parser can choose -to parse it as either "(x+y)*z" or "x+(y*z)". With common definitions from -mathematics, we expect the later parse, because "*" (multiplication) has -higher precedence than "+" (addition).

- -

There are many ways to handle this, but an elegant and efficient way is to -use Operator-Precedence -Parsing. This parsing technique uses the precedence of binary operators to -guide recursion. To start with, we need a table of precedences:

- -
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-...
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-  ...
-
-
- -

For the basic form of Kaleidoscope, we will only support 4 binary operators -(this can obviously be extended by you, our brave and intrepid reader). The -Parser.precedence function returns the precedence for the current -token, or -1 if the token is not a binary operator. Having a Hashtbl.t -makes it easy to add new operators and makes it clear that the algorithm doesn't -depend on the specific operators involved, but it would be easy enough to -eliminate the Hashtbl.t and do the comparisons in the -Parser.precedence function. (Or just use a fixed-size array).

- -

With the helper above defined, we can now start parsing binary expressions. -The basic idea of operator precedence parsing is to break down an expression -with potentially ambiguous binary operators into pieces. Consider ,for example, -the expression "a+b+(c+d)*e*f+g". Operator precedence parsing considers this -as a stream of primary expressions separated by binary operators. As such, -it will first parse the leading primary expression "a", then it will see the -pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g]. Note that because parentheses -are primary expressions, the binary expression parser doesn't need to worry -about nested subexpressions like (c+d) at all. -

- -

-To start, an expression is a primary expression potentially followed by a -sequence of [binop,primaryexpr] pairs:

- -
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
-
-
- -

Parser.parse_bin_rhs is the function that parses the sequence of -pairs for us. It takes a precedence and a pointer to an expression for the part -that has been parsed so far. Note that "x" is a perfectly valid expression: As -such, "binoprhs" is allowed to be empty, in which case it returns the expression -that is passed into it. In our example above, the code passes the expression for -"a" into Parser.parse_bin_rhs and the current token is "+".

- -

The precedence value passed into Parser.parse_bin_rhs indicates the -minimal operator precedence that the function is allowed to eat. For -example, if the current pair stream is [+, x] and Parser.parse_bin_rhs -is passed in a precedence of 40, it will not consume any tokens (because the -precedence of '+' is only 20). With this in mind, Parser.parse_bin_rhs -starts with:

- -
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-
-
- -

This code gets the precedence of the current token and checks to see if if is -too low. Because we defined invalid tokens to have a precedence of -1, this -check implicitly knows that the pair-stream ends when the token stream runs out -of binary operators. If this check succeeds, we know that the token is a binary -operator and that it will be included in this expression:

- -
-
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-
-
- -

As such, this code eats (and remembers) the binary operator and then parses -the primary expression that follows. This builds up the whole pair, the first of -which is [+, b] for the running example.

- -

Now that we parsed the left-hand side of an expression and one pair of the -RHS sequence, we have to decide which way the expression associates. In -particular, we could have "(a+b) binop unparsed" or "a + (b binop unparsed)". -To determine this, we look ahead at "binop" to determine its precedence and -compare it to BinOp's precedence (which is '+' in this case):

- -
-
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-
-
- -

If the precedence of the binop to the right of "RHS" is lower or equal to the -precedence of our current operator, then we know that the parentheses associate -as "(a+b) binop ...". In our example, the current operator is "+" and the next -operator is "+", we know that they have the same precedence. In this case we'll -create the AST node for "a+b", and then continue parsing:

- -
-
-          ... if body omitted ...
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-
-
- -

In our example above, this will turn "a+b+" into "(a+b)" and execute the next -iteration of the loop, with "+" as the current token. The code above will eat, -remember, and parse "(c+d)" as the primary expression, which makes the -current pair equal to [+, (c+d)]. It will then evaluate the 'if' conditional above with -"*" as the binop to the right of the primary. In this case, the precedence of "*" is -higher than the precedence of "+" so the if condition will be entered.

- -

The critical question left here is "how can the if condition parse the right -hand side in full"? In particular, to build the AST correctly for our example, -it needs to get all of "(c+d)*e*f" as the RHS expression variable. The code to -do this is surprisingly simple (code from the above two blocks duplicated for -context):

- -
-
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              if token_prec < precedence c2
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-
-
- -

At this point, we know that the binary operator to the RHS of our primary -has higher precedence than the binop we are currently parsing. As such, we know -that any sequence of pairs whose operators are all higher precedence than "+" -should be parsed together and returned as "RHS". To do this, we recursively -invoke the Parser.parse_bin_rhs function specifying "token_prec+1" as -the minimum precedence required for it to continue. In our example above, this -will cause it to return the AST node for "(c+d)*e*f" as RHS, which is then set -as the RHS of the '+' expression.

- -

Finally, on the next iteration of the while loop, the "+g" piece is parsed -and added to the AST. With this little bit of code (14 non-trivial lines), we -correctly handle fully general binary expression parsing in a very elegant way. -This was a whirlwind tour of this code, and it is somewhat subtle. I recommend -running through it with a few tough examples to see how it works. -

- -

This wraps up handling of expressions. At this point, we can point the -parser at an arbitrary token stream and build an expression from it, stopping -at the first token that is not part of the expression. Next up we need to -handle function definitions, etc.

- -
- - -

Parsing the Rest

- - -
- -

-The next thing missing is handling of function prototypes. In Kaleidoscope, -these are used both for 'extern' function declarations as well as function body -definitions. The code to do this is straight-forward and not very interesting -(once you've survived expressions): -

- -
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-
- -

Given this, a function definition is very simple, just a prototype plus -an expression to implement the body:

- -
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-
- -

In addition, we support 'extern' to declare functions like 'sin' and 'cos' as -well as to support forward declaration of user functions. These 'extern's are just -prototypes with no body:

- -
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -

Finally, we'll also let the user type in arbitrary top-level expressions and -evaluate them on the fly. We will handle this by defining anonymous nullary -(zero argument) functions for them:

- -
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-
- -

Now that we have all the pieces, let's build a little driver that will let us -actually execute this code we've built!

- -
- - -

The Driver

- - -
- -

The driver for this simply invokes all of the parsing pieces with a top-level -dispatch loop. There isn't much interesting here, so I'll just include the -top-level loop. See below for full code in the "Top-Level -Parsing" section.

- -
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern ->
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop stream
-
-
- -

The most interesting part of this is that we ignore top-level semicolons. -Why is this, you ask? The basic reason is that if you type "4 + 5" at the -command line, the parser doesn't know whether that is the end of what you will type -or not. For example, on the next line you could type "def foo..." in which case -4+5 is the end of a top-level expression. Alternatively you could type "* 6", -which would continue the expression. Having top-level semicolons allows you to -type "4+5;", and the parser will know you are done.

- -
- - -

Conclusions

- - -
- -

With just under 300 lines of commented code (240 lines of non-comment, -non-blank code), we fully defined our minimal language, including a lexer, -parser, and AST builder. With this done, the executable will validate -Kaleidoscope code and tell us if it is grammatically invalid. For -example, here is a sample interaction:

- -
-
-$ ./toy.byte
-ready> def foo(x y) x+foo(y, 4.0);
-Parsed a function definition.
-ready> def foo(x y) x+y y;
-Parsed a function definition.
-Parsed a top-level expr
-ready> def foo(x y) x+y );
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready> extern sin(a);
-ready> Parsed an extern
-ready> ^D
-$
-
-
- -

There is a lot of room for extension here. You can define new AST nodes, -extend the language in many ways, etc. In the -next installment, we will describe how to generate LLVM Intermediate -Representation (IR) from the AST.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for this and the previous chapter. -Note that it is fully self-contained: you don't need LLVM or any external -libraries at all for this. (Besides the ocaml standard libraries, of -course.) To build this, just compile with:

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern ->
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-;;
-
-main ()
-
-
-
- -Next: Implementing Code Generation to LLVM IR -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner - Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl2.rst b/docs/tutorial/OCamlLangImpl2.rst new file mode 100644 index 000000000000..83a22ab22d4f --- /dev/null +++ b/docs/tutorial/OCamlLangImpl2.rst @@ -0,0 +1,896 @@ +=========================================== +Kaleidoscope: Implementing a Parser and AST +=========================================== + +.. contents:: + :local: + +Chapter 2 Introduction +====================== + +Welcome to Chapter 2 of the "`Implementing a language with LLVM in +Objective Caml `_" tutorial. This chapter shows you how to +use the lexer, built in `Chapter 1 `_, to build a +full `parser `_ for our +Kaleidoscope language. Once we have a parser, we'll define and build an +`Abstract Syntax +Tree `_ (AST). + +The parser we will build uses a combination of `Recursive Descent +Parsing `_ and +`Operator-Precedence +Parsing `_ to +parse the Kaleidoscope language (the latter for binary expressions and +the former for everything else). Before we get to parsing though, lets +talk about the output of the parser: the Abstract Syntax Tree. + +The Abstract Syntax Tree (AST) +============================== + +The AST for a program captures its behavior in such a way that it is +easy for later stages of the compiler (e.g. code generation) to +interpret. We basically want one object for each construct in the +language, and the AST should closely model the language. In +Kaleidoscope, we have expressions, a prototype, and a function object. +We'll start with expressions first: + +.. code-block:: ocaml + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + +The code above shows the definition of the base ExprAST class and one +subclass which we use for numeric literals. The important thing to note +about this code is that the Number variant captures the numeric value of +the literal as an instance variable. This allows later phases of the +compiler to know what the stored numeric value is. + +Right now we only create the AST, so there are no useful functions on +them. It would be very easy to add a function to pretty print the code, +for example. Here are the other expression AST node definitions that +we'll use in the basic form of the Kaleidoscope language: + +.. code-block:: ocaml + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + +This is all (intentionally) rather straight-forward: variables capture +the variable name, binary operators capture their opcode (e.g. '+'), and +calls capture a function name as well as a list of any argument +expressions. One thing that is nice about our AST is that it captures +the language features without talking about the syntax of the language. +Note that there is no discussion about precedence of binary operators, +lexical structure, etc. + +For our basic language, these are all of the expression nodes we'll +define. Because it doesn't have conditional control flow, it isn't +Turing-complete; we'll fix that in a later installment. The two things +we need next are a way to talk about the interface to a function, and a +way to talk about functions themselves: + +.. code-block:: ocaml + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = Prototype of string * string array + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +In Kaleidoscope, functions are typed with just a count of their +arguments. Since all values are double precision floating point, the +type of each argument doesn't need to be stored anywhere. In a more +aggressive and realistic language, the "expr" variants would probably +have a type field. + +With this scaffolding, we can now talk about parsing expressions and +function bodies in Kaleidoscope. + +Parser Basics +============= + +Now that we have an AST to build, we need to define the parser code to +build it. The idea here is that we want to parse something like "x+y" +(which is returned as three tokens by the lexer) into an AST that could +be generated with calls like this: + +.. code-block:: ocaml + + let x = Variable "x" in + let y = Variable "y" in + let result = Binary ('+', x, y) in + ... + +The error handling routines make use of the builtin ``Stream.Failure`` +and ``Stream.Error``s. ``Stream.Failure`` is raised when the parser is +unable to find any matching token in the first position of a pattern. +``Stream.Error`` is raised when the first token matches, but the rest do +not. The error recovery in our parser will not be the best and is not +particular user-friendly, but it will be enough for our tutorial. These +exceptions make it easier to handle errors in routines that have various +return types. + +With these basic types and exceptions, we can implement the first piece +of our grammar: numeric literals. + +Basic Expression Parsing +======================== + +We start with numeric literals, because they are the simplest to +process. For each production in our grammar, we'll define a function +which parses that production. We call this class of expressions +"primary" expressions, for reasons that will become more clear `later in +the tutorial `_. In order to parse an +arbitrary primary expression, we need to determine what sort of +expression it is. For numeric literals, we have: + +.. code-block:: ocaml + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) + parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + +This routine is very simple: it expects to be called when the current +token is a ``Token.Number`` token. It takes the current number value, +creates a ``Ast.Number`` node, advances the lexer to the next token, and +finally returns. + +There are some interesting aspects to this. The most important one is +that this routine eats all of the tokens that correspond to the +production and returns the lexer buffer with the next token (which is +not part of the grammar production) ready to go. This is a fairly +standard way to go for recursive descent parsers. For a better example, +the parenthesis operator is defined like this: + +.. code-block:: ocaml + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + +This function illustrates a number of interesting things about the +parser: + +1) It shows how we use the ``Stream.Error`` exception. When called, this +function expects that the current token is a '(' token, but after +parsing the subexpression, it is possible that there is no ')' waiting. +For example, if the user types in "(4 x" instead of "(4)", the parser +should emit an error. Because errors can occur, the parser needs a way +to indicate that they happened. In our parser, we use the camlp4 +shortcut syntax ``token ?? "parse error"``, where if the token before +the ``??`` does not match, then ``Stream.Error "parse error"`` will be +raised. + +2) Another interesting aspect of this function is that it uses recursion +by calling ``Parser.parse_primary`` (we will soon see that +``Parser.parse_primary`` can call ``Parser.parse_primary``). This is +powerful because it allows us to handle recursive grammars, and keeps +each production very simple. Note that parentheses do not cause +construction of AST nodes themselves. While we could do it this way, the +most important role of parentheses are to guide the parser and provide +grouping. Once the parser constructs the AST, parentheses are not +needed. + +The next simple production is for handling variable references and +function calls: + +.. code-block:: ocaml + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + +This routine follows the same style as the other routines. (It expects +to be called if the current token is a ``Token.Ident`` token). It also +has recursion and error handling. One interesting aspect of this is that +it uses *look-ahead* to determine if the current identifier is a stand +alone variable reference or if it is a function call expression. It +handles this by checking to see if the token after the identifier is a +'(' token, constructing either a ``Ast.Variable`` or ``Ast.Call`` node +as appropriate. + +We finish up by raising an exception if we received a token we didn't +expect: + +.. code-block:: ocaml + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +Now that basic expressions are handled, we need to handle binary +expressions. They are a bit more complex. + +Binary Expression Parsing +========================= + +Binary expressions are significantly harder to parse because they are +often ambiguous. For example, when given the string "x+y\*z", the parser +can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common +definitions from mathematics, we expect the later parse, because "\*" +(multiplication) has higher *precedence* than "+" (addition). + +There are many ways to handle this, but an elegant and efficient way is +to use `Operator-Precedence +Parsing `_. +This parsing technique uses the precedence of binary operators to guide +recursion. To start with, we need a table of precedences: + +.. code-block:: ocaml + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + ... + + let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + ... + +For the basic form of Kaleidoscope, we will only support 4 binary +operators (this can obviously be extended by you, our brave and intrepid +reader). The ``Parser.precedence`` function returns the precedence for +the current token, or -1 if the token is not a binary operator. Having a +``Hashtbl.t`` makes it easy to add new operators and makes it clear that +the algorithm doesn't depend on the specific operators involved, but it +would be easy enough to eliminate the ``Hashtbl.t`` and do the +comparisons in the ``Parser.precedence`` function. (Or just use a +fixed-size array). + +With the helper above defined, we can now start parsing binary +expressions. The basic idea of operator precedence parsing is to break +down an expression with potentially ambiguous binary operators into +pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g". +Operator precedence parsing considers this as a stream of primary +expressions separated by binary operators. As such, it will first parse +the leading primary expression "a", then it will see the pairs [+, b] +[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are +primary expressions, the binary expression parser doesn't need to worry +about nested subexpressions like (c+d) at all. + +To start, an expression is a primary expression potentially followed by +a sequence of [binop,primaryexpr] pairs: + +.. code-block:: ocaml + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + +``Parser.parse_bin_rhs`` is the function that parses the sequence of +pairs for us. It takes a precedence and a pointer to an expression for +the part that has been parsed so far. Note that "x" is a perfectly valid +expression: As such, "binoprhs" is allowed to be empty, in which case it +returns the expression that is passed into it. In our example above, the +code passes the expression for "a" into ``Parser.parse_bin_rhs`` and the +current token is "+". + +The precedence value passed into ``Parser.parse_bin_rhs`` indicates the +*minimal operator precedence* that the function is allowed to eat. For +example, if the current pair stream is [+, x] and +``Parser.parse_bin_rhs`` is passed in a precedence of 40, it will not +consume any tokens (because the precedence of '+' is only 20). With this +in mind, ``Parser.parse_bin_rhs`` starts with: + +.. code-block:: ocaml + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + +This code gets the precedence of the current token and checks to see if +if is too low. Because we defined invalid tokens to have a precedence of +-1, this check implicitly knows that the pair-stream ends when the token +stream runs out of binary operators. If this check succeeds, we know +that the token is a binary operator and that it will be included in this +expression: + +.. code-block:: ocaml + + (* Eat the binop. *) + Stream.junk stream; + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + +As such, this code eats (and remembers) the binary operator and then +parses the primary expression that follows. This builds up the whole +pair, the first of which is [+, b] for the running example. + +Now that we parsed the left-hand side of an expression and one pair of +the RHS sequence, we have to decide which way the expression associates. +In particular, we could have "(a+b) binop unparsed" or "a + (b binop +unparsed)". To determine this, we look ahead at "binop" to determine its +precedence and compare it to BinOp's precedence (which is '+' in this +case): + +.. code-block:: ocaml + + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + +If the precedence of the binop to the right of "RHS" is lower or equal +to the precedence of our current operator, then we know that the +parentheses associate as "(a+b) binop ...". In our example, the current +operator is "+" and the next operator is "+", we know that they have the +same precedence. In this case we'll create the AST node for "a+b", and +then continue parsing: + +.. code-block:: ocaml + + ... if body omitted ... + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + +In our example above, this will turn "a+b+" into "(a+b)" and execute the +next iteration of the loop, with "+" as the current token. The code +above will eat, remember, and parse "(c+d)" as the primary expression, +which makes the current pair equal to [+, (c+d)]. It will then evaluate +the 'if' conditional above with "\*" as the binop to the right of the +primary. In this case, the precedence of "\*" is higher than the +precedence of "+" so the if condition will be entered. + +The critical question left here is "how can the if condition parse the +right hand side in full"? In particular, to build the AST correctly for +our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression +variable. The code to do this is surprisingly simple (code from the +above two blocks duplicated for context): + +.. code-block:: ocaml + + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + if token_prec < precedence c2 + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + +At this point, we know that the binary operator to the RHS of our +primary has higher precedence than the binop we are currently parsing. +As such, we know that any sequence of pairs whose operators are all +higher precedence than "+" should be parsed together and returned as +"RHS". To do this, we recursively invoke the ``Parser.parse_bin_rhs`` +function specifying "token\_prec+1" as the minimum precedence required +for it to continue. In our example above, this will cause it to return +the AST node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of +the '+' expression. + +Finally, on the next iteration of the while loop, the "+g" piece is +parsed and added to the AST. With this little bit of code (14 +non-trivial lines), we correctly handle fully general binary expression +parsing in a very elegant way. This was a whirlwind tour of this code, +and it is somewhat subtle. I recommend running through it with a few +tough examples to see how it works. + +This wraps up handling of expressions. At this point, we can point the +parser at an arbitrary token stream and build an expression from it, +stopping at the first token that is not part of the expression. Next up +we need to handle function definitions, etc. + +Parsing the Rest +================ + +The next thing missing is handling of function prototypes. In +Kaleidoscope, these are used both for 'extern' function declarations as +well as function body definitions. The code to do this is +straight-forward and not very interesting (once you've survived +expressions): + +.. code-block:: ocaml + + (* prototype + * ::= id '(' id* ')' *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +Given this, a function definition is very simple, just a prototype plus +an expression to implement the body: + +.. code-block:: ocaml + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +In addition, we support 'extern' to declare functions like 'sin' and +'cos' as well as to support forward declaration of user functions. These +'extern's are just prototypes with no body: + +.. code-block:: ocaml + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +Finally, we'll also let the user type in arbitrary top-level expressions +and evaluate them on the fly. We will handle this by defining anonymous +nullary (zero argument) functions for them: + +.. code-block:: ocaml + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +Now that we have all the pieces, let's build a little driver that will +let us actually *execute* this code we've built! + +The Driver +========== + +The driver for this simply invokes all of the parsing pieces with a +top-level dispatch loop. There isn't much interesting here, so I'll just +include the top-level loop. See `below <#code>`_ for full code in the +"Top-Level Parsing" section. + +.. code-block:: ocaml + + (* top ::= definition | external | expression | ';' *) + let rec main_loop stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop stream + + | Some token -> + begin + try match token with + | Token.Def -> + ignore(Parser.parse_definition stream); + print_endline "parsed a function definition."; + | Token.Extern -> + ignore(Parser.parse_extern stream); + print_endline "parsed an extern."; + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + ignore(Parser.parse_toplevel stream); + print_endline "parsed a top-level expr"; + with Stream.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop stream + +The most interesting part of this is that we ignore top-level +semicolons. Why is this, you ask? The basic reason is that if you type +"4 + 5" at the command line, the parser doesn't know whether that is the +end of what you will type or not. For example, on the next line you +could type "def foo..." in which case 4+5 is the end of a top-level +expression. Alternatively you could type "\* 6", which would continue +the expression. Having top-level semicolons allows you to type "4+5;", +and the parser will know you are done. + +Conclusions +=========== + +With just under 300 lines of commented code (240 lines of non-comment, +non-blank code), we fully defined our minimal language, including a +lexer, parser, and AST builder. With this done, the executable will +validate Kaleidoscope code and tell us if it is grammatically invalid. +For example, here is a sample interaction: + +.. code-block:: bash + + $ ./toy.byte + ready> def foo(x y) x+foo(y, 4.0); + Parsed a function definition. + ready> def foo(x y) x+y y; + Parsed a function definition. + Parsed a top-level expr + ready> def foo(x y) x+y ); + Parsed a function definition. + Error: unknown token when expecting an expression + ready> extern sin(a); + ready> Parsed an extern + ready> ^D + $ + +There is a lot of room for extension here. You can define new AST nodes, +extend the language in many ways, etc. In the `next +installment `_, we will describe how to generate +LLVM Intermediate Representation (IR) from the AST. + +Full Code Listing +================= + +Here is the complete code listing for this and the previous chapter. +Note that it is fully self-contained: you don't need LLVM or any +external libraries at all for this. (Besides the ocaml standard +libraries, of course.) To build this, just compile with: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = Prototype of string * string array + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + (* top ::= definition | external | expression | ';' *) + let rec main_loop stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop stream + + | Some token -> + begin + try match token with + | Token.Def -> + ignore(Parser.parse_definition stream); + print_endline "parsed a function definition."; + | Token.Extern -> + ignore(Parser.parse_extern stream); + print_endline "parsed an extern."; + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + ignore(Parser.parse_toplevel stream); + print_endline "parsed a top-level expr"; + with Stream.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop stream; + ;; + + main () + +`Next: Implementing Code Generation to LLVM IR `_ + diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html deleted file mode 100644 index e6105e85f4d2..000000000000 --- a/docs/tutorial/OCamlLangImpl3.html +++ /dev/null @@ -1,1093 +0,0 @@ - - - - - Kaleidoscope: Implementing code generation to LLVM IR - - - - - - - - -

Kaleidoscope: Code generation to LLVM IR

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 3 Introduction

- - -
- -

Welcome to Chapter 3 of the "Implementing a language -with LLVM" tutorial. This chapter shows you how to transform the Abstract Syntax Tree, built in Chapter 2, into -LLVM IR. This will teach you a little bit about how LLVM does things, as well -as demonstrate how easy it is to use. It's much more work to build a lexer and -parser than it is to generate LLVM IR code. :) -

- -

Please note: the code in this chapter and later require LLVM 2.3 or -LLVM SVN to work. LLVM 2.2 and before will not work with it.

- -
- - -

Code Generation Setup

- - -
- -

-In order to generate LLVM IR, we want some simple setup to get started. First -we define virtual code generation (codegen) methods in each AST class:

- -
-
-let rec codegen_expr = function
-  | Ast.Number n -> ...
-  | Ast.Variable name -> ...
-
-
- -

The Codegen.codegen_expr function says to emit IR for that AST node -along with all the things it depends on, and they all return an LLVM Value -object. "Value" is the class used to represent a "Static Single -Assignment (SSA) register" or "SSA value" in LLVM. The most distinct aspect -of SSA values is that their value is computed as the related instruction -executes, and it does not get a new value until (and if) the instruction -re-executes. In other words, there is no way to "change" an SSA value. For -more information, please read up on Static Single -Assignment - the concepts are really quite natural once you grok them.

- -

The -second thing we want is an "Error" exception like we used for the parser, which -will be used to report errors found during code generation (for example, use of -an undeclared parameter):

- -
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-
- -

The static variables will be used during code generation. -Codgen.the_module is the LLVM construct that contains all of the -functions and global variables in a chunk of code. In many ways, it is the -top-level structure that the LLVM IR uses to contain code.

- -

The Codegen.builder object is a helper object that makes it easy to -generate LLVM instructions. Instances of the IRBuilder -class keep track of the current place to insert instructions and has methods to -create new instructions.

- -

The Codegen.named_values map keeps track of which values are defined -in the current scope and what their LLVM representation is. (In other words, it -is a symbol table for the code). In this form of Kaleidoscope, the only things -that can be referenced are function parameters. As such, function parameters -will be in this map when generating code for their function body.

- -

-With these basics in place, we can start talking about how to generate code for -each expression. Note that this assumes that the Codgen.builder has -been set up to generate code into something. For now, we'll assume -that this has already been done, and we'll just use it to emit code.

- -
- - -

Expression Code Generation

- - -
- -

Generating LLVM code for expression nodes is very straightforward: less -than 30 lines of commented code for all four of our expression nodes. First -we'll do numeric literals:

- -
-
-  | Ast.Number n -> const_float double_type n
-
-
- -

In the LLVM IR, numeric constants are represented with the -ConstantFP class, which holds the numeric value in an APFloat -internally (APFloat has the capability of holding floating point -constants of Arbitrary Precision). This code basically just -creates and returns a ConstantFP. Note that in the LLVM IR -that constants are all uniqued together and shared. For this reason, the API -uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".

- -
-
-  | Ast.Variable name ->
-      (try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name"))
-
-
- -

References to variables are also quite simple using LLVM. In the simple -version of Kaleidoscope, we assume that the variable has already been emitted -somewhere and its value is available. In practice, the only values that can be -in the Codegen.named_values map are function arguments. This code -simply checks to see that the specified name is in the map (if not, an unknown -variable is being referenced) and returns the value for it. In future chapters, -we'll add support for loop induction variables -in the symbol table, and for local -variables.

- -
-
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
-        | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_fmul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -> raise (Error "invalid binary operator")
-      end
-
-
- -

Binary operators start to get more interesting. The basic idea here is that -we recursively emit code for the left-hand side of the expression, then the -right-hand side, then we compute the result of the binary expression. In this -code, we do a simple switch on the opcode to create the right LLVM instruction. -

- -

In the example above, the LLVM builder class is starting to show its value. -IRBuilder knows where to insert the newly created instruction, all you have to -do is specify what instruction to create (e.g. with Llvm.create_add), -which operands to use (lhs and rhs here) and optionally -provide a name for the generated instruction.

- -

One nice thing about LLVM is that the name is just a hint. For instance, if -the code above emits multiple "addtmp" variables, LLVM will automatically -provide each one with an increasing, unique numeric suffix. Local value names -for instructions are purely optional, but it makes it much easier to read the -IR dumps.

- -

LLVM instructions are constrained by -strict rules: for example, the Left and Right operators of -an add instruction must have the same -type, and the result type of the add must match the operand types. Because -all values in Kaleidoscope are doubles, this makes for very simple code for add, -sub and mul.

- -

On the other hand, LLVM specifies that the fcmp instruction always returns an 'i1' value -(a one bit integer). The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value. In order to get these semantics, we combine the fcmp instruction with -a uitofp instruction. This instruction -converts its input integer into a floating point value by treating the input -as an unsigned value. In contrast, if we used the sitofp instruction, the Kaleidoscope '<' -operator would return 0.0 and -1.0, depending on the input value.

- -
-
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-
- -

Code generation for function calls is quite straightforward with LLVM. The -code above initially does a function name lookup in the LLVM Module's symbol -table. Recall that the LLVM Module is the container that holds all of the -functions we are JIT'ing. By giving each function the same name as what the -user specifies, we can use the LLVM symbol table to resolve function names for -us.

- -

Once we have the function to call, we recursively codegen each argument that -is to be passed in, and create an LLVM call -instruction. Note that LLVM uses the native C calling conventions by -default, allowing these calls to also call into standard library functions like -"sin" and "cos", with no additional effort.

- -

This wraps up our handling of the four basic expressions that we have so far -in Kaleidoscope. Feel free to go in and add some more. For example, by -browsing the LLVM language reference you'll find -several other interesting instructions that are really easy to plug into our -basic framework.

- -
- - -

Function Code Generation

- - -
- -

Code generation for prototypes and functions must handle a number of -details, which make their code less beautiful than expression code -generation, but allows us to illustrate some important points. First, lets -talk about code generation for prototypes: they are used both for function -bodies and external function declarations. The code starts with:

- -
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-
-
- -

This code packs a lot of power into a few lines. Note first that this -function returns a "Function*" instead of a "Value*" (although at the moment -they both are modeled by llvalue in ocaml). Because a "prototype" -really talks about the external interface for a function (not the value computed -by an expression), it makes sense for it to return the LLVM Function it -corresponds to when codegen'd.

- -

The call to Llvm.function_type creates the Llvm.llvalue -that should be used for a given Prototype. Since all function arguments in -Kaleidoscope are of type double, the first line creates a vector of "N" LLVM -double types. It then uses the Llvm.function_type method to create a -function type that takes "N" doubles as arguments, returns one double as a -result, and that is not vararg (that uses the function -Llvm.var_arg_function_type). Note that Types in LLVM are uniqued just -like Constants are, so you don't "new" a type, you "get" it.

- -

The final line above checks if the function has already been defined in -Codegen.the_module. If not, we will create it.

- -
-
-        | None -> declare_function name ft the_module
-
-
- -

This indicates the type and name to use, as well as which module to insert -into. By default we assume a function has -Llvm.Linkage.ExternalLinkage. "external -linkage" means that the function may be defined outside the current module -and/or that it is callable by functions outside the module. The "name" -passed in is the name the user specified: this name is registered in -"Codegen.the_module"s symbol table, which is used by the function call -code above.

- -

In Kaleidoscope, I choose to allow redefinitions of functions in two cases: -first, we want to allow 'extern'ing a function more than once, as long as the -prototypes for the externs match (since all arguments have the same type, we -just have to check that the number of arguments match). Second, we want to -allow 'extern'ing a function and then defining a body for it. This is useful -when defining mutually recursive functions.

- -
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if Array.length (basic_blocks f) == 0 then () else
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if Array.length (params f) == Array.length args then () else
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-
- -

In order to verify the logic above, we first check to see if the pre-existing -function is "empty". In this case, empty means that it has no basic blocks in -it, which means it has no body. If it has no body, it is a forward -declaration. Since we don't allow anything after a full definition of the -function, the code rejects this case. If the previous reference to a function -was an 'extern', we simply verify that the number of arguments for that -definition and this one match up. If not, we emit an error.

- -
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-
- -

The last bit of code for prototypes loops over all of the arguments in the -function, setting the name of the LLVM Argument objects to match, and registering -the arguments in the Codegen.named_values map for future use by the -Ast.Variable variant. Once this is set up, it returns the Function -object to the caller. Note that we don't check for conflicting -argument names here (e.g. "extern foo(a b a)"). Doing so would be very -straight-forward with the mechanics we have already used above.

- -
-
-let codegen_func = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-
- -

Code generation for function definitions starts out simply enough: we just -codegen the prototype (Proto) and verify that it is ok. We then clear out the -Codegen.named_values map to make sure that there isn't anything in it -from the last function we compiled. Code generation of the prototype ensures -that there is an LLVM Function object that is ready to go for us.

- -
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-
- -

Now we get to the point where the Codegen.builder is set up. The -first line creates a new -basic block (named -"entry"), which is inserted into the_function. The second line then -tells the builder that new instructions should be inserted into the end of the -new basic block. Basic blocks in LLVM are an important part of functions that -define the Control Flow Graph. -Since we don't have any control flow, our functions will only contain one -block at this point. We'll fix this in Chapter -5 :).

- -
-
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-
-
- -

Once the insertion point is set up, we call the Codegen.codegen_func -method for the root expression of the function. If no error happens, this emits -code to compute the expression into the entry block and returns the value that -was computed. Assuming no error, we then create an LLVM ret instruction, which completes the function. -Once the function is built, we call -Llvm_analysis.assert_valid_function, which is provided by LLVM. This -function does a variety of consistency checks on the generated code, to -determine if our compiler is doing everything right. Using this is important: -it can catch a lot of bugs. Once the function is finished and validated, we -return it.

- -
-
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -

The only piece left here is handling of the error case. For simplicity, we -handle this by merely deleting the function we produced with the -Llvm.delete_function method. This allows the user to redefine a -function that they incorrectly typed in before: if we didn't delete it, it -would live in the symbol table, with a body, preventing future redefinition.

- -

This code does have a bug, though. Since the Codegen.codegen_proto -can return a previously defined forward declaration, our code can actually delete -a forward declaration. There are a number of ways to fix this bug, see what you -can come up with! Here is a testcase:

- -
-
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-
-
- -
- - -

Driver Changes and Closing Thoughts

- - -
- -

-For now, code generation to LLVM doesn't really get us much, except that we can -look at the pretty IR calls. The sample code inserts calls to Codegen into the -"Toplevel.main_loop", and then dumps out the LLVM IR. This gives a -nice way to look at the LLVM IR for simple functions. For example: -

- -
-
-ready> 4+5;
-Read top-level expression:
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-
-
- -

Note how the parser turns the top-level expression into anonymous functions -for us. This will be handy when we add JIT -support in the next chapter. Also note that the code is very literally -transcribed, no optimizations are being performed. We will -add optimizations explicitly -in the next chapter.

- -
-
-ready> def foo(a b) a*a + 2*a*b + b*b;
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-
-
- -

This shows some simple arithmetic. Notice the striking similarity to the -LLVM builder calls that we use to create the instructions.

- -
-
-ready> def bar(a) foo(a, 4.0) + bar(31337);
-Read function definition:
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-
-
- -

This shows some function calls. Note that this function will take a long -time to execute if you call it. In the future we'll add conditional control -flow to actually make recursion useful :).

- -
-
-ready> extern cos(x);
-Read extern:
-declare double @cos(double)
-
-ready> cos(1.234);
-Read top-level expression:
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-
-
- -

This shows an extern for the libm "cos" function, and a call to it.

- - -
-
-ready> ^D
-; ModuleID = 'my cool jit'
-
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-
-
- -

When you quit the current demo, it dumps out the IR for the entire module -generated. Here you can see the big picture with all the functions referencing -each other.

- -

This wraps up the third chapter of the Kaleidoscope tutorial. Up next, we'll -describe how to add JIT codegen and optimizer -support to this so we can actually start running code!

- -
- - - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -LLVM code generator. Because this uses the LLVM libraries, we need to link -them in. To do this, we use the llvm-config tool to inform -our makefile/command line about which options to use:

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
-
-
- -
myocamlbuild.ml:
-
-
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
codegen.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -> const_float double_type n
-  | Ast.Variable name ->
-      (try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_add lhs_val rhs_val "addtmp" builder
-        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_mul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -> raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -> declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f <> At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) <> ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func e);
-        | Token.Extern ->
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            dump_value (Codegen.codegen_func e);
-        with Stream.Error s | Codegen.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-
-
-
- -Next: Adding JIT and Optimizer Support -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl3.rst b/docs/tutorial/OCamlLangImpl3.rst new file mode 100644 index 000000000000..fd9f0e5cd3f4 --- /dev/null +++ b/docs/tutorial/OCamlLangImpl3.rst @@ -0,0 +1,961 @@ +======================================== +Kaleidoscope: Code generation to LLVM IR +======================================== + +.. contents:: + :local: + +Chapter 3 Introduction +====================== + +Welcome to Chapter 3 of the "`Implementing a language with +LLVM `_" tutorial. This chapter shows you how to transform +the `Abstract Syntax Tree `_, built in Chapter 2, +into LLVM IR. This will teach you a little bit about how LLVM does +things, as well as demonstrate how easy it is to use. It's much more +work to build a lexer and parser than it is to generate LLVM IR code. :) + +**Please note**: the code in this chapter and later require LLVM 2.3 or +LLVM SVN to work. LLVM 2.2 and before will not work with it. + +Code Generation Setup +===================== + +In order to generate LLVM IR, we want some simple setup to get started. +First we define virtual code generation (codegen) methods in each AST +class: + +.. code-block:: ocaml + + let rec codegen_expr = function + | Ast.Number n -> ... + | Ast.Variable name -> ... + +The ``Codegen.codegen_expr`` function says to emit IR for that AST node +along with all the things it depends on, and they all return an LLVM +Value object. "Value" is the class used to represent a "`Static Single +Assignment +(SSA) `_ +register" or "SSA value" in LLVM. The most distinct aspect of SSA values +is that their value is computed as the related instruction executes, and +it does not get a new value until (and if) the instruction re-executes. +In other words, there is no way to "change" an SSA value. For more +information, please read up on `Static Single +Assignment `_ +- the concepts are really quite natural once you grok them. + +The second thing we want is an "Error" exception like we used for the +parser, which will be used to report errors found during code generation +(for example, use of an undeclared parameter): + +.. code-block:: ocaml + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + +The static variables will be used during code generation. +``Codgen.the_module`` is the LLVM construct that contains all of the +functions and global variables in a chunk of code. In many ways, it is +the top-level structure that the LLVM IR uses to contain code. + +The ``Codegen.builder`` object is a helper object that makes it easy to +generate LLVM instructions. Instances of the +```IRBuilder`` `_ +class keep track of the current place to insert instructions and has +methods to create new instructions. + +The ``Codegen.named_values`` map keeps track of which values are defined +in the current scope and what their LLVM representation is. (In other +words, it is a symbol table for the code). In this form of Kaleidoscope, +the only things that can be referenced are function parameters. As such, +function parameters will be in this map when generating code for their +function body. + +With these basics in place, we can start talking about how to generate +code for each expression. Note that this assumes that the +``Codgen.builder`` has been set up to generate code *into* something. +For now, we'll assume that this has already been done, and we'll just +use it to emit code. + +Expression Code Generation +========================== + +Generating LLVM code for expression nodes is very straightforward: less +than 30 lines of commented code for all four of our expression nodes. +First we'll do numeric literals: + +.. code-block:: ocaml + + | Ast.Number n -> const_float double_type n + +In the LLVM IR, numeric constants are represented with the +``ConstantFP`` class, which holds the numeric value in an ``APFloat`` +internally (``APFloat`` has the capability of holding floating point +constants of Arbitrary Precision). This code basically just creates +and returns a ``ConstantFP``. Note that in the LLVM IR that constants +are all uniqued together and shared. For this reason, the API uses "the +foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)". + +.. code-block:: ocaml + + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + +References to variables are also quite simple using LLVM. In the simple +version of Kaleidoscope, we assume that the variable has already been +emitted somewhere and its value is available. In practice, the only +values that can be in the ``Codegen.named_values`` map are function +arguments. This code simply checks to see that the specified name is in +the map (if not, an unknown variable is being referenced) and returns +the value for it. In future chapters, we'll add support for `loop +induction variables `_ in the symbol table, and for +`local variables `_. + +.. code-block:: ocaml + + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + +Binary operators start to get more interesting. The basic idea here is +that we recursively emit code for the left-hand side of the expression, +then the right-hand side, then we compute the result of the binary +expression. In this code, we do a simple switch on the opcode to create +the right LLVM instruction. + +In the example above, the LLVM builder class is starting to show its +value. IRBuilder knows where to insert the newly created instruction, +all you have to do is specify what instruction to create (e.g. with +``Llvm.create_add``), which operands to use (``lhs`` and ``rhs`` here) +and optionally provide a name for the generated instruction. + +One nice thing about LLVM is that the name is just a hint. For instance, +if the code above emits multiple "addtmp" variables, LLVM will +automatically provide each one with an increasing, unique numeric +suffix. Local value names for instructions are purely optional, but it +makes it much easier to read the IR dumps. + +`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict +rules: for example, the Left and Right operators of an `add +instruction <../LangRef.html#i_add>`_ must have the same type, and the +result type of the add must match the operand types. Because all values +in Kaleidoscope are doubles, this makes for very simple code for add, +sub and mul. + +On the other hand, LLVM specifies that the `fcmp +instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a +one bit integer). The problem with this is that Kaleidoscope wants the +value to be a 0.0 or 1.0 value. In order to get these semantics, we +combine the fcmp instruction with a `uitofp +instruction <../LangRef.html#i_uitofp>`_. This instruction converts its +input integer into a floating point value by treating the input as an +unsigned value. In contrast, if we used the `sitofp +instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator +would return 0.0 and -1.0, depending on the input value. + +.. code-block:: ocaml + + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + +Code generation for function calls is quite straightforward with LLVM. +The code above initially does a function name lookup in the LLVM +Module's symbol table. Recall that the LLVM Module is the container that +holds all of the functions we are JIT'ing. By giving each function the +same name as what the user specifies, we can use the LLVM symbol table +to resolve function names for us. + +Once we have the function to call, we recursively codegen each argument +that is to be passed in, and create an LLVM `call +instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C +calling conventions by default, allowing these calls to also call into +standard library functions like "sin" and "cos", with no additional +effort. + +This wraps up our handling of the four basic expressions that we have so +far in Kaleidoscope. Feel free to go in and add some more. For example, +by browsing the `LLVM language reference <../LangRef.html>`_ you'll find +several other interesting instructions that are really easy to plug into +our basic framework. + +Function Code Generation +======================== + +Code generation for prototypes and functions must handle a number of +details, which make their code less beautiful than expression code +generation, but allows us to illustrate some important points. First, +lets talk about code generation for prototypes: they are used both for +function bodies and external function declarations. The code starts +with: + +.. code-block:: ocaml + + let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + +This code packs a lot of power into a few lines. Note first that this +function returns a "Function\*" instead of a "Value\*" (although at the +moment they both are modeled by ``llvalue`` in ocaml). Because a +"prototype" really talks about the external interface for a function +(not the value computed by an expression), it makes sense for it to +return the LLVM Function it corresponds to when codegen'd. + +The call to ``Llvm.function_type`` creates the ``Llvm.llvalue`` that +should be used for a given Prototype. Since all function arguments in +Kaleidoscope are of type double, the first line creates a vector of "N" +LLVM double types. It then uses the ``Llvm.function_type`` method to +create a function type that takes "N" doubles as arguments, returns one +double as a result, and that is not vararg (that uses the function +``Llvm.var_arg_function_type``). Note that Types in LLVM are uniqued +just like ``Constant``'s are, so you don't "new" a type, you "get" it. + +The final line above checks if the function has already been defined in +``Codegen.the_module``. If not, we will create it. + +.. code-block:: ocaml + + | None -> declare_function name ft the_module + +This indicates the type and name to use, as well as which module to +insert into. By default we assume a function has +``Llvm.Linkage.ExternalLinkage``. "`external +linkage `_" means that the function may be defined +outside the current module and/or that it is callable by functions +outside the module. The "``name``" passed in is the name the user +specified: this name is registered in "``Codegen.the_module``"s symbol +table, which is used by the function call code above. + +In Kaleidoscope, I choose to allow redefinitions of functions in two +cases: first, we want to allow 'extern'ing a function more than once, as +long as the prototypes for the externs match (since all arguments have +the same type, we just have to check that the number of arguments +match). Second, we want to allow 'extern'ing a function and then +defining a body for it. This is useful when defining mutually recursive +functions. + +.. code-block:: ocaml + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if Array.length (basic_blocks f) == 0 then () else + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if Array.length (params f) == Array.length args then () else + raise (Error "redefinition of function with different # args"); + f + in + +In order to verify the logic above, we first check to see if the +pre-existing function is "empty". In this case, empty means that it has +no basic blocks in it, which means it has no body. If it has no body, it +is a forward declaration. Since we don't allow anything after a full +definition of the function, the code rejects this case. If the previous +reference to a function was an 'extern', we simply verify that the +number of arguments for that definition and this one match up. If not, +we emit an error. + +.. code-block:: ocaml + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +The last bit of code for prototypes loops over all of the arguments in +the function, setting the name of the LLVM Argument objects to match, +and registering the arguments in the ``Codegen.named_values`` map for +future use by the ``Ast.Variable`` variant. Once this is set up, it +returns the Function object to the caller. Note that we don't check for +conflicting argument names here (e.g. "extern foo(a b a)"). Doing so +would be very straight-forward with the mechanics we have already used +above. + +.. code-block:: ocaml + + let codegen_func = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + +Code generation for function definitions starts out simply enough: we +just codegen the prototype (Proto) and verify that it is ok. We then +clear out the ``Codegen.named_values`` map to make sure that there isn't +anything in it from the last function we compiled. Code generation of +the prototype ensures that there is an LLVM Function object that is +ready to go for us. + +.. code-block:: ocaml + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + +Now we get to the point where the ``Codegen.builder`` is set up. The +first line creates a new `basic +block `_ (named "entry"), +which is inserted into ``the_function``. The second line then tells the +builder that new instructions should be inserted into the end of the new +basic block. Basic blocks in LLVM are an important part of functions +that define the `Control Flow +Graph `_. Since we +don't have any control flow, our functions will only contain one block +at this point. We'll fix this in `Chapter 5 `_ :). + +.. code-block:: ocaml + + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + the_function + +Once the insertion point is set up, we call the ``Codegen.codegen_func`` +method for the root expression of the function. If no error happens, +this emits code to compute the expression into the entry block and +returns the value that was computed. Assuming no error, we then create +an LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the +function. Once the function is built, we call +``Llvm_analysis.assert_valid_function``, which is provided by LLVM. This +function does a variety of consistency checks on the generated code, to +determine if our compiler is doing everything right. Using this is +important: it can catch a lot of bugs. Once the function is finished and +validated, we return it. + +.. code-block:: ocaml + + with e -> + delete_function the_function; + raise e + +The only piece left here is handling of the error case. For simplicity, +we handle this by merely deleting the function we produced with the +``Llvm.delete_function`` method. This allows the user to redefine a +function that they incorrectly typed in before: if we didn't delete it, +it would live in the symbol table, with a body, preventing future +redefinition. + +This code does have a bug, though. Since the ``Codegen.codegen_proto`` +can return a previously defined forward declaration, our code can +actually delete a forward declaration. There are a number of ways to fix +this bug, see what you can come up with! Here is a testcase: + +:: + + extern foo(a b); # ok, defines foo. + def foo(a b) c; # error, 'c' is invalid. + def bar() foo(1, 2); # error, unknown function "foo" + +Driver Changes and Closing Thoughts +=================================== + +For now, code generation to LLVM doesn't really get us much, except that +we can look at the pretty IR calls. The sample code inserts calls to +Codegen into the "``Toplevel.main_loop``", and then dumps out the LLVM +IR. This gives a nice way to look at the LLVM IR for simple functions. +For example: + +:: + + ready> 4+5; + Read top-level expression: + define double @""() { + entry: + %addtmp = fadd double 4.000000e+00, 5.000000e+00 + ret double %addtmp + } + +Note how the parser turns the top-level expression into anonymous +functions for us. This will be handy when we add `JIT +support `_ in the next chapter. Also note that +the code is very literally transcribed, no optimizations are being +performed. We will `add +optimizations `_ explicitly in the +next chapter. + +:: + + ready> def foo(a b) a*a + 2*a*b + b*b; + Read function definition: + define double @foo(double %a, double %b) { + entry: + %multmp = fmul double %a, %a + %multmp1 = fmul double 2.000000e+00, %a + %multmp2 = fmul double %multmp1, %b + %addtmp = fadd double %multmp, %multmp2 + %multmp3 = fmul double %b, %b + %addtmp4 = fadd double %addtmp, %multmp3 + ret double %addtmp4 + } + +This shows some simple arithmetic. Notice the striking similarity to the +LLVM builder calls that we use to create the instructions. + +:: + + ready> def bar(a) foo(a, 4.0) + bar(31337); + Read function definition: + define double @bar(double %a) { + entry: + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) + %addtmp = fadd double %calltmp, %calltmp1 + ret double %addtmp + } + +This shows some function calls. Note that this function will take a long +time to execute if you call it. In the future we'll add conditional +control flow to actually make recursion useful :). + +:: + + ready> extern cos(x); + Read extern: + declare double @cos(double) + + ready> cos(1.234); + Read top-level expression: + define double @""() { + entry: + %calltmp = call double @cos(double 1.234000e+00) + ret double %calltmp + } + +This shows an extern for the libm "cos" function, and a call to it. + +:: + + ready> ^D + ; ModuleID = 'my cool jit' + + define double @""() { + entry: + %addtmp = fadd double 4.000000e+00, 5.000000e+00 + ret double %addtmp + } + + define double @foo(double %a, double %b) { + entry: + %multmp = fmul double %a, %a + %multmp1 = fmul double 2.000000e+00, %a + %multmp2 = fmul double %multmp1, %b + %addtmp = fadd double %multmp, %multmp2 + %multmp3 = fmul double %b, %b + %addtmp4 = fadd double %addtmp, %multmp3 + ret double %addtmp4 + } + + define double @bar(double %a) { + entry: + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) + %addtmp = fadd double %calltmp, %calltmp1 + ret double %addtmp + } + + declare double @cos(double) + + define double @""() { + entry: + %calltmp = call double @cos(double 1.234000e+00) + ret double %calltmp + } + +When you quit the current demo, it dumps out the IR for the entire +module generated. Here you can see the big picture with all the +functions referencing each other. + +This wraps up the third chapter of the Kaleidoscope tutorial. Up next, +we'll describe how to `add JIT codegen and optimizer +support `_ to this so we can actually start running +code! + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the LLVM code generator. Because this uses the LLVM libraries, we need +to link them in. To do this, we use the +`llvm-config `_ tool to inform +our makefile/command line about which options to use: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + <*.{byte,native}>: g++, use_llvm, use_llvm_analysis + +myocamlbuild.ml: + .. code-block:: ocaml + + open Ocamlbuild_plugin;; + + ocaml_lib ~extern:true "llvm";; + ocaml_lib ~extern:true "llvm_analysis";; + + flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = Prototype of string * string array + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +codegen.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + + open Llvm + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + + let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + + let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + + let codegen_func = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + the_function + with e -> + delete_function the_function; + raise e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + open Llvm + + (* top ::= definition | external | expression | ';' *) + let rec main_loop stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + dump_value (Codegen.codegen_func e); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + open Llvm + + let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module + ;; + + main () + +`Next: Adding JIT and Optimizer Support `_ + diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html deleted file mode 100644 index d3cfd3d6736a..000000000000 --- a/docs/tutorial/OCamlLangImpl4.html +++ /dev/null @@ -1,1026 +0,0 @@ - - - - - Kaleidoscope: Adding JIT and Optimizer Support - - - - - - - - -

Kaleidoscope: Adding JIT and Optimizer Support

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 4 Introduction

- - -
- -

Welcome to Chapter 4 of the "Implementing a language -with LLVM" tutorial. Chapters 1-3 described the implementation of a simple -language and added support for generating LLVM IR. This chapter describes -two new techniques: adding optimizer support to your language, and adding JIT -compiler support. These additions will demonstrate how to get nice, efficient code -for the Kaleidoscope language.

- -
- - -

Trivial Constant Folding

- - -
- -

Note: the default IRBuilder now always includes the constant -folding optimisations below.

- -

-Our demonstration for Chapter 3 is elegant and easy to extend. Unfortunately, -it does not produce wonderful code. For example, when compiling simple code, -we don't get obvious optimizations:

- -
-
-ready> def test(x) 1+2+x;
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 1.000000e+00, 2.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-
-
- -

This code is a very, very literal transcription of the AST built by parsing -the input. As such, this transcription lacks optimizations like constant folding -(we'd like to get "add x, 3.0" in the example above) as well as other -more important optimizations. Constant folding, in particular, is a very common -and very important optimization: so much so that many language implementors -implement constant folding support in their AST representation.

- -

With LLVM, you don't need this support in the AST. Since all calls to build -LLVM IR go through the LLVM builder, it would be nice if the builder itself -checked to see if there was a constant folding opportunity when you call it. -If so, it could just do the constant fold and return the constant instead of -creating an instruction. This is exactly what the LLVMFoldingBuilder -class does. - -

All we did was switch from LLVMBuilder to -LLVMFoldingBuilder. Though we change no other code, we now have all of our -instructions implicitly constant folded without us having to do anything -about it. For example, the input above now compiles to:

- -
-
-ready> def test(x) 1+2+x;
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-
-
- -

Well, that was easy :). In practice, we recommend always using -LLVMFoldingBuilder when generating code like this. It has no -"syntactic overhead" for its use (you don't have to uglify your compiler with -constant checks everywhere) and it can dramatically reduce the amount of -LLVM IR that is generated in some cases (particular for languages with a macro -preprocessor or that use a lot of constants).

- -

On the other hand, the LLVMFoldingBuilder is limited by the fact -that it does all of its analysis inline with the code as it is built. If you -take a slightly more complex example:

- -
-
-ready> def test(x) (1+2+x)*(x+(1+2));
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-
-
- -

In this case, the LHS and RHS of the multiplication are the same value. We'd -really like to see this generate "tmp = x+3; result = tmp*tmp;" instead -of computing "x*3" twice.

- -

Unfortunately, no amount of local analysis will be able to detect and correct -this. This requires two transformations: reassociation of expressions (to -make the add's lexically identical) and Common Subexpression Elimination (CSE) -to delete the redundant add instruction. Fortunately, LLVM provides a broad -range of optimizations that you can use, in the form of "passes".

- -
- - -

LLVM Optimization Passes

- - -
- -

LLVM provides many optimization passes, which do many different sorts of -things and have different tradeoffs. Unlike other systems, LLVM doesn't hold -to the mistaken notion that one set of optimizations is right for all languages -and for all situations. LLVM allows a compiler implementor to make complete -decisions about what optimizations to use, in which order, and in what -situation.

- -

As a concrete example, LLVM supports both "whole module" passes, which look -across as large of body of code as they can (often a whole file, but if run -at link time, this can be a substantial portion of the whole program). It also -supports and includes "per-function" passes which just operate on a single -function at a time, without looking at other functions. For more information -on passes and how they are run, see the How -to Write a Pass document and the List of LLVM -Passes.

- -

For Kaleidoscope, we are currently generating functions on the fly, one at -a time, as the user types them in. We aren't shooting for the ultimate -optimization experience in this setting, but we also want to catch the easy and -quick stuff where possible. As such, we will choose to run a few per-function -optimizations as the user types the function in. If we wanted to make a "static -Kaleidoscope compiler", we would use exactly the code we have now, except that -we would defer running the optimizer until the entire file has been parsed.

- -

In order to get per-function optimizations going, we need to set up a -Llvm.PassManager to hold and -organize the LLVM optimizations that we want to run. Once we have that, we can -add a set of optimizations to run. The code looks like this:

- -
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-
- -

The meat of the matter here, is the definition of "the_fpm". It -requires a pointer to the the_module to construct itself. Once it is -set up, we use a series of "add" calls to add a bunch of LLVM passes. The -first pass is basically boilerplate, it adds a pass so that later optimizations -know how the data structures in the program are laid out. The -"the_execution_engine" variable is related to the JIT, which we will -get to in the next section.

- -

In this case, we choose to add 4 optimization passes. The passes we chose -here are a pretty standard set of "cleanup" optimizations that are useful for -a wide variety of code. I won't delve into what they do but, believe me, -they are a good starting place :).

- -

Once the Llvm.PassManager. is set up, we need to make use of it. -We do this by running it after our newly created function is constructed (in -Codegen.codegen_func), but before it is returned to the client:

- -
-
-let codegen_func the_fpm = function
-      ...
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-
-
- -

As you can see, this is pretty straightforward. The the_fpm -optimizes and updates the LLVM Function* in place, improving (hopefully) its -body. With this in place, we can try our test above again:

- -
-
-ready> def test(x) (1+2+x)*(x+(1+2));
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-
-
- -

As expected, we now get our nicely optimized code, saving a floating point -add instruction from every execution of this function.

- -

LLVM provides a wide variety of optimizations that can be used in certain -circumstances. Some documentation about the various -passes is available, but it isn't very complete. Another good source of -ideas can come from looking at the passes that Clang runs to get -started. The "opt" tool allows you to experiment with passes from the -command line, so you can see if they do anything.

- -

Now that we have reasonable code coming out of our front-end, lets talk about -executing it!

- -
- - -

Adding a JIT Compiler

- - -
- -

Code that is available in LLVM IR can have a wide variety of tools -applied to it. For example, you can run optimizations on it (as we did above), -you can dump it out in textual or binary forms, you can compile the code to an -assembly file (.s) for some target, or you can JIT compile it. The nice thing -about the LLVM IR representation is that it is the "common currency" between -many different parts of the compiler. -

- -

In this section, we'll add JIT compiler support to our interpreter. The -basic idea that we want for Kaleidoscope is to have the user enter function -bodies as they do now, but immediately evaluate the top-level expressions they -type in. For example, if they type in "1 + 2;", we should evaluate and print -out 3. If they define a function, they should be able to call it from the -command line.

- -

In order to do this, we first declare and initialize the JIT. This is done -by adding a global variable and a call in main:

- -
-
-...
-let main () =
-  ...
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  ...
-
-
- -

This creates an abstract "Execution Engine" which can be either a JIT -compiler or the LLVM interpreter. LLVM will automatically pick a JIT compiler -for you if one is available for your platform, otherwise it will fall back to -the interpreter.

- -

Once the Llvm_executionengine.ExecutionEngine.t is created, the JIT -is ready to be used. There are a variety of APIs that are useful, but the -simplest one is the "Llvm_executionengine.ExecutionEngine.run_function" -function. This method JIT compiles the specified LLVM Function and returns a -function pointer to the generated machine code. In our case, this means that we -can change the code that parses a top-level expression to look like this:

- -
-
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-
-
- -

Recall that we compile top-level expressions into a self-contained LLVM -function that takes no arguments and returns the computed double. Because the -LLVM JIT compiler matches the native platform ABI, this means that you can just -cast the result pointer to a function pointer of that type and call it directly. -This means, there is no difference between JIT compiled code and native machine -code that is statically linked into your application.

- -

With just these two changes, lets see how Kaleidoscope works now!

- -
-
-ready> 4+5;
-define double @""() {
-entry:
-        ret double 9.000000e+00
-}
-
-Evaluated to 9.000000
-
-
- -

Well this looks like it is basically working. The dump of the function -shows the "no argument function that always returns double" that we synthesize -for each top level expression that is typed in. This demonstrates very basic -functionality, but can we do more?

- -
-
-ready> def testfunc(x y) x + y*2; 
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-        %multmp = fmul double %y, 2.000000e+00
-        %addtmp = fadd double %multmp, %x
-        ret double %addtmp
-}
-
-ready> testfunc(4, 10);
-define double @""() {
-entry:
-        %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-        ret double %calltmp
-}
-
-Evaluated to 24.000000
-
-
- -

This illustrates that we can now call user code, but there is something a bit -subtle going on here. Note that we only invoke the JIT on the anonymous -functions that call testfunc, but we never invoked it -on testfunc itself. What actually happened here is that the JIT -scanned for all non-JIT'd functions transitively called from the anonymous -function and compiled all of them before returning -from run_function.

- -

The JIT provides a number of other more advanced interfaces for things like -freeing allocated machine code, rejit'ing functions to update them, etc. -However, even with this simple code, we get some surprisingly powerful -capabilities - check this out (I removed the dump of the anonymous functions, -you should get the idea by now :) :

- -
-
-ready> extern sin(x);
-Read extern:
-declare double @sin(double)
-
-ready> extern cos(x);
-Read extern:
-declare double @cos(double)
-
-ready> sin(1.0);
-Evaluated to 0.841471
-
-ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
-Read function definition:
-define double @foo(double %x) {
-entry:
-        %calltmp = call double @sin(double %x)
-        %multmp = fmul double %calltmp, %calltmp
-        %calltmp2 = call double @cos(double %x)
-        %multmp4 = fmul double %calltmp2, %calltmp2
-        %addtmp = fadd double %multmp, %multmp4
-        ret double %addtmp
-}
-
-ready> foo(4.0);
-Evaluated to 1.000000
-
-
- -

Whoa, how does the JIT know about sin and cos? The answer is surprisingly -simple: in this example, the JIT started execution of a function and got to a -function call. It realized that the function was not yet JIT compiled and -invoked the standard set of routines to resolve the function. In this case, -there is no body defined for the function, so the JIT ended up calling -"dlsym("sin")" on the Kaleidoscope process itself. Since -"sin" is defined within the JIT's address space, it simply patches up -calls in the module to call the libm version of sin directly.

- -

The LLVM JIT provides a number of interfaces (look in the -llvm_executionengine.mli file) for controlling how unknown functions -get resolved. It allows you to establish explicit mappings between IR objects -and addresses (useful for LLVM global variables that you want to map to static -tables, for example), allows you to dynamically decide on the fly based on the -function name, and even allows you to have the JIT compile functions lazily the -first time they're called.

- -

One interesting application of this is that we can now extend the language -by writing arbitrary C code to implement operations. For example, if we add: -

- -
-
-/* putchard - putchar that takes a double and returns 0. */
-extern "C"
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-
- -

Now we can produce simple output to the console by using things like: -"extern putchard(x); putchard(120);", which prints a lowercase 'x' on -the console (120 is the ASCII code for 'x'). Similar code could be used to -implement file I/O, console input, and many other capabilities in -Kaleidoscope.

- -

This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At -this point, we can compile a non-Turing-complete programming language, optimize -and JIT compile it in a user-driven way. Next up we'll look into extending the language with control flow -constructs, tackling some interesting LLVM IR issues along the way.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -LLVM JIT and optimizer. To build this example, use: -

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
-<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
-<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
-
-
- -
myocamlbuild.ml:
-
-
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
codegen.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -> const_float double_type n
-  | Ast.Variable name ->
-      (try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_add lhs_val rhs_val "addtmp" builder
-        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_mul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -> raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -> declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f <> At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) <> ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern ->
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-
-
- -
bindings.c
-
-
-#include <stdio.h>
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-
-
- -Next: Extending the language: control flow -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl4.rst b/docs/tutorial/OCamlLangImpl4.rst new file mode 100644 index 000000000000..b13b2afa8883 --- /dev/null +++ b/docs/tutorial/OCamlLangImpl4.rst @@ -0,0 +1,915 @@ +============================================== +Kaleidoscope: Adding JIT and Optimizer Support +============================================== + +.. contents:: + :local: + +Chapter 4 Introduction +====================== + +Welcome to Chapter 4 of the "`Implementing a language with +LLVM `_" tutorial. Chapters 1-3 described the implementation +of a simple language and added support for generating LLVM IR. This +chapter describes two new techniques: adding optimizer support to your +language, and adding JIT compiler support. These additions will +demonstrate how to get nice, efficient code for the Kaleidoscope +language. + +Trivial Constant Folding +======================== + +**Note:** the default ``IRBuilder`` now always includes the constant +folding optimisations below. + +Our demonstration for Chapter 3 is elegant and easy to extend. +Unfortunately, it does not produce wonderful code. For example, when +compiling simple code, we don't get obvious optimizations: + +:: + + ready> def test(x) 1+2+x; + Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 1.000000e+00, 2.000000e+00 + %addtmp1 = fadd double %addtmp, %x + ret double %addtmp1 + } + +This code is a very, very literal transcription of the AST built by +parsing the input. As such, this transcription lacks optimizations like +constant folding (we'd like to get "``add x, 3.0``" in the example +above) as well as other more important optimizations. Constant folding, +in particular, is a very common and very important optimization: so much +so that many language implementors implement constant folding support in +their AST representation. + +With LLVM, you don't need this support in the AST. Since all calls to +build LLVM IR go through the LLVM builder, it would be nice if the +builder itself checked to see if there was a constant folding +opportunity when you call it. If so, it could just do the constant fold +and return the constant instead of creating an instruction. This is +exactly what the ``LLVMFoldingBuilder`` class does. + +All we did was switch from ``LLVMBuilder`` to ``LLVMFoldingBuilder``. +Though we change no other code, we now have all of our instructions +implicitly constant folded without us having to do anything about it. +For example, the input above now compiles to: + +:: + + ready> def test(x) 1+2+x; + Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 3.000000e+00, %x + ret double %addtmp + } + +Well, that was easy :). In practice, we recommend always using +``LLVMFoldingBuilder`` when generating code like this. It has no +"syntactic overhead" for its use (you don't have to uglify your compiler +with constant checks everywhere) and it can dramatically reduce the +amount of LLVM IR that is generated in some cases (particular for +languages with a macro preprocessor or that use a lot of constants). + +On the other hand, the ``LLVMFoldingBuilder`` is limited by the fact +that it does all of its analysis inline with the code as it is built. If +you take a slightly more complex example: + +:: + + ready> def test(x) (1+2+x)*(x+(1+2)); + ready> Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double 3.000000e+00, %x + %addtmp1 = fadd double %x, 3.000000e+00 + %multmp = fmul double %addtmp, %addtmp1 + ret double %multmp + } + +In this case, the LHS and RHS of the multiplication are the same value. +We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``" +instead of computing "``x*3``" twice. + +Unfortunately, no amount of local analysis will be able to detect and +correct this. This requires two transformations: reassociation of +expressions (to make the add's lexically identical) and Common +Subexpression Elimination (CSE) to delete the redundant add instruction. +Fortunately, LLVM provides a broad range of optimizations that you can +use, in the form of "passes". + +LLVM Optimization Passes +======================== + +LLVM provides many optimization passes, which do many different sorts of +things and have different tradeoffs. Unlike other systems, LLVM doesn't +hold to the mistaken notion that one set of optimizations is right for +all languages and for all situations. LLVM allows a compiler implementor +to make complete decisions about what optimizations to use, in which +order, and in what situation. + +As a concrete example, LLVM supports both "whole module" passes, which +look across as large of body of code as they can (often a whole file, +but if run at link time, this can be a substantial portion of the whole +program). It also supports and includes "per-function" passes which just +operate on a single function at a time, without looking at other +functions. For more information on passes and how they are run, see the +`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the +`List of LLVM Passes <../Passes.html>`_. + +For Kaleidoscope, we are currently generating functions on the fly, one +at a time, as the user types them in. We aren't shooting for the +ultimate optimization experience in this setting, but we also want to +catch the easy and quick stuff where possible. As such, we will choose +to run a few per-function optimizations as the user types the function +in. If we wanted to make a "static Kaleidoscope compiler", we would use +exactly the code we have now, except that we would defer running the +optimizer until the entire file has been parsed. + +In order to get per-function optimizations going, we need to set up a +`Llvm.PassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold and +organize the LLVM optimizations that we want to run. Once we have that, +we can add a set of optimizations to run. The code looks like this: + +.. code-block:: ocaml + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combining the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + +The meat of the matter here, is the definition of "``the_fpm``". It +requires a pointer to the ``the_module`` to construct itself. Once it is +set up, we use a series of "add" calls to add a bunch of LLVM passes. +The first pass is basically boilerplate, it adds a pass so that later +optimizations know how the data structures in the program are laid out. +The "``the_execution_engine``" variable is related to the JIT, which we +will get to in the next section. + +In this case, we choose to add 4 optimization passes. The passes we +chose here are a pretty standard set of "cleanup" optimizations that are +useful for a wide variety of code. I won't delve into what they do but, +believe me, they are a good starting place :). + +Once the ``Llvm.PassManager.`` is set up, we need to make use of it. We +do this by running it after our newly created function is constructed +(in ``Codegen.codegen_func``), but before it is returned to the client: + +.. code-block:: ocaml + + let codegen_func the_fpm = function + ... + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + +As you can see, this is pretty straightforward. The ``the_fpm`` +optimizes and updates the LLVM Function\* in place, improving +(hopefully) its body. With this in place, we can try our test above +again: + +:: + + ready> def test(x) (1+2+x)*(x+(1+2)); + ready> Read function definition: + define double @test(double %x) { + entry: + %addtmp = fadd double %x, 3.000000e+00 + %multmp = fmul double %addtmp, %addtmp + ret double %multmp + } + +As expected, we now get our nicely optimized code, saving a floating +point add instruction from every execution of this function. + +LLVM provides a wide variety of optimizations that can be used in +certain circumstances. Some `documentation about the various +passes <../Passes.html>`_ is available, but it isn't very complete. +Another good source of ideas can come from looking at the passes that +``Clang`` runs to get started. The "``opt``" tool allows you to +experiment with passes from the command line, so you can see if they do +anything. + +Now that we have reasonable code coming out of our front-end, lets talk +about executing it! + +Adding a JIT Compiler +===================== + +Code that is available in LLVM IR can have a wide variety of tools +applied to it. For example, you can run optimizations on it (as we did +above), you can dump it out in textual or binary forms, you can compile +the code to an assembly file (.s) for some target, or you can JIT +compile it. The nice thing about the LLVM IR representation is that it +is the "common currency" between many different parts of the compiler. + +In this section, we'll add JIT compiler support to our interpreter. The +basic idea that we want for Kaleidoscope is to have the user enter +function bodies as they do now, but immediately evaluate the top-level +expressions they type in. For example, if they type in "1 + 2;", we +should evaluate and print out 3. If they define a function, they should +be able to call it from the command line. + +In order to do this, we first declare and initialize the JIT. This is +done by adding a global variable and a call in ``main``: + +.. code-block:: ocaml + + ... + let main () = + ... + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + ... + +This creates an abstract "Execution Engine" which can be either a JIT +compiler or the LLVM interpreter. LLVM will automatically pick a JIT +compiler for you if one is available for your platform, otherwise it +will fall back to the interpreter. + +Once the ``Llvm_executionengine.ExecutionEngine.t`` is created, the JIT +is ready to be used. There are a variety of APIs that are useful, but +the simplest one is the +"``Llvm_executionengine.ExecutionEngine.run_function``" function. This +method JIT compiles the specified LLVM Function and returns a function +pointer to the generated machine code. In our case, this means that we +can change the code that parses a top-level expression to look like +this: + +.. code-block:: ocaml + + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + +Recall that we compile top-level expressions into a self-contained LLVM +function that takes no arguments and returns the computed double. +Because the LLVM JIT compiler matches the native platform ABI, this +means that you can just cast the result pointer to a function pointer of +that type and call it directly. This means, there is no difference +between JIT compiled code and native machine code that is statically +linked into your application. + +With just these two changes, lets see how Kaleidoscope works now! + +:: + + ready> 4+5; + define double @""() { + entry: + ret double 9.000000e+00 + } + + Evaluated to 9.000000 + +Well this looks like it is basically working. The dump of the function +shows the "no argument function that always returns double" that we +synthesize for each top level expression that is typed in. This +demonstrates very basic functionality, but can we do more? + +:: + + ready> def testfunc(x y) x + y*2; + Read function definition: + define double @testfunc(double %x, double %y) { + entry: + %multmp = fmul double %y, 2.000000e+00 + %addtmp = fadd double %multmp, %x + ret double %addtmp + } + + ready> testfunc(4, 10); + define double @""() { + entry: + %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01) + ret double %calltmp + } + + Evaluated to 24.000000 + +This illustrates that we can now call user code, but there is something +a bit subtle going on here. Note that we only invoke the JIT on the +anonymous functions that *call testfunc*, but we never invoked it on +*testfunc* itself. What actually happened here is that the JIT scanned +for all non-JIT'd functions transitively called from the anonymous +function and compiled all of them before returning from +``run_function``. + +The JIT provides a number of other more advanced interfaces for things +like freeing allocated machine code, rejit'ing functions to update them, +etc. However, even with this simple code, we get some surprisingly +powerful capabilities - check this out (I removed the dump of the +anonymous functions, you should get the idea by now :) : + +:: + + ready> extern sin(x); + Read extern: + declare double @sin(double) + + ready> extern cos(x); + Read extern: + declare double @cos(double) + + ready> sin(1.0); + Evaluated to 0.841471 + + ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x); + Read function definition: + define double @foo(double %x) { + entry: + %calltmp = call double @sin(double %x) + %multmp = fmul double %calltmp, %calltmp + %calltmp2 = call double @cos(double %x) + %multmp4 = fmul double %calltmp2, %calltmp2 + %addtmp = fadd double %multmp, %multmp4 + ret double %addtmp + } + + ready> foo(4.0); + Evaluated to 1.000000 + +Whoa, how does the JIT know about sin and cos? The answer is +surprisingly simple: in this example, the JIT started execution of a +function and got to a function call. It realized that the function was +not yet JIT compiled and invoked the standard set of routines to resolve +the function. In this case, there is no body defined for the function, +so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope +process itself. Since "``sin``" is defined within the JIT's address +space, it simply patches up calls in the module to call the libm version +of ``sin`` directly. + +The LLVM JIT provides a number of interfaces (look in the +``llvm_executionengine.mli`` file) for controlling how unknown functions +get resolved. It allows you to establish explicit mappings between IR +objects and addresses (useful for LLVM global variables that you want to +map to static tables, for example), allows you to dynamically decide on +the fly based on the function name, and even allows you to have the JIT +compile functions lazily the first time they're called. + +One interesting application of this is that we can now extend the +language by writing arbitrary C code to implement operations. For +example, if we add: + +.. code-block:: c++ + + /* putchard - putchar that takes a double and returns 0. */ + extern "C" + double putchard(double X) { + putchar((char)X); + return 0; + } + +Now we can produce simple output to the console by using things like: +"``extern putchard(x); putchard(120);``", which prints a lowercase 'x' +on the console (120 is the ASCII code for 'x'). Similar code could be +used to implement file I/O, console input, and many other capabilities +in Kaleidoscope. + +This completes the JIT and optimizer chapter of the Kaleidoscope +tutorial. At this point, we can compile a non-Turing-complete +programming language, optimize and JIT compile it in a user-driven way. +Next up we'll look into `extending the language with control flow +constructs `_, tackling some interesting LLVM IR +issues along the way. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the LLVM JIT and optimizer. To build this example, use: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + <*.{byte,native}>: g++, use_llvm, use_llvm_analysis + <*.{byte,native}>: use_llvm_executionengine, use_llvm_target + <*.{byte,native}>: use_llvm_scalar_opts, use_bindings + +myocamlbuild.ml: + .. code-block:: ocaml + + open Ocamlbuild_plugin;; + + ocaml_lib ~extern:true "llvm";; + ocaml_lib ~extern:true "llvm_analysis";; + ocaml_lib ~extern:true "llvm_executionengine";; + ocaml_lib ~extern:true "llvm_target";; + ocaml_lib ~extern:true "llvm_scalar_opts";; + + flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; + dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = Prototype of string * string array + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +codegen.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + + open Llvm + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + + let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + + let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + + let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + + (* top ::= definition | external | expression | ';' *) + let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + open Llvm_target + open Llvm_scalar_opts + + let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module + ;; + + main () + +bindings.c + .. code-block:: c + + #include + + /* putchard - putchar that takes a double and returns 0. */ + extern double putchard(double X) { + putchar((char)X); + return 0; + } + +`Next: Extending the language: control flow `_ + diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html deleted file mode 100644 index 0a759ac66d67..000000000000 --- a/docs/tutorial/OCamlLangImpl5.html +++ /dev/null @@ -1,1560 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: Control Flow - - - - - - - - -

Kaleidoscope: Extending the Language: Control Flow

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 5 Introduction

- - -
- -

Welcome to Chapter 5 of the "Implementing a language -with LLVM" tutorial. Parts 1-4 described the implementation of the simple -Kaleidoscope language and included support for generating LLVM IR, followed by -optimizations and a JIT compiler. Unfortunately, as presented, Kaleidoscope is -mostly useless: it has no control flow other than call and return. This means -that you can't have conditional branches in the code, significantly limiting its -power. In this episode of "build that compiler", we'll extend Kaleidoscope to -have an if/then/else expression plus a simple 'for' loop.

- -
- - -

If/Then/Else

- - -
- -

-Extending Kaleidoscope to support if/then/else is quite straightforward. It -basically requires adding lexer support for this "new" concept to the lexer, -parser, AST, and LLVM code emitter. This example is nice, because it shows how -easy it is to "grow" a language over time, incrementally extending it as new -ideas are discovered.

- -

Before we get going on "how" we add this extension, lets talk about "what" we -want. The basic idea is that we want to be able to write this sort of thing: -

- -
-
-def fib(x)
-  if x < 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-
- -

In Kaleidoscope, every construct is an expression: there are no statements. -As such, the if/then/else expression needs to return a value like any other. -Since we're using a mostly functional form, we'll have it evaluate its -conditional, then return the 'then' or 'else' value based on how the condition -was resolved. This is very similar to the C "?:" expression.

- -

The semantics of the if/then/else expression is that it evaluates the -condition to a boolean equality value: 0.0 is considered to be false and -everything else is considered to be true. -If the condition is true, the first subexpression is evaluated and returned, if -the condition is false, the second subexpression is evaluated and returned. -Since Kaleidoscope allows side-effects, this behavior is important to nail down. -

- -

Now that we know what we "want", lets break this down into its constituent -pieces.

- - -

Lexer Extensions for If/Then/Else

- - - -
- -

The lexer extensions are straightforward. First we add new variants -for the relevant tokens:

- -
-
-  (* control *)
-  | If | Then | Else | For | In
-
-
- -

Once we have that, we recognize the new keywords in the lexer. This is pretty simple -stuff:

- -
-
-      ...
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | "if" -> [< 'Token.If; stream >]
-      | "then" -> [< 'Token.Then; stream >]
-      | "else" -> [< 'Token.Else; stream >]
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-
- -
- - -

AST Extensions for If/Then/Else

- - -
- -

To represent the new expression we add a new AST variant for it:

- -
-
-type expr =
-  ...
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-
- -

The AST variant just has pointers to the various subexpressions.

- -
- - -

Parser Extensions for If/Then/Else

- - -
- -

Now that we have the relevant tokens coming from the lexer and we have the -AST node to build, our parsing logic is relatively straightforward. First we -define a new parsing function:

- -
-
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [< 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
-      Ast.If (c, t, e)
-
-
- -

Next we hook it up as a primary expression:

- -
-
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [< 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
-      Ast.If (c, t, e)
-
-
- -
- - -

LLVM IR for If/Then/Else

- - -
- -

Now that we have it parsing and building the AST, the final piece is adding -LLVM code generation support. This is the most interesting part of the -if/then/else example, because this is where it starts to introduce new concepts. -All of the code above has been thoroughly described in previous chapters. -

- -

To motivate the code we want to produce, lets take a look at a simple -example. Consider:

- -
-
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-
-
- -

If you disable optimizations, the code you'll (soon) get from Kaleidoscope -looks like this:

- -
-
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:    ; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-
-
- -

To visualize the control flow graph, you can use a nifty feature of the LLVM -'opt' tool. If you put this LLVM IR -into "t.ll" and run "llvm-as < t.ll | opt -analyze -view-cfg", a window will pop up and you'll -see this graph:

- -
Example CFG
- -

Another way to get this is to call "Llvm_analysis.view_function_cfg -f" or "Llvm_analysis.view_function_cfg_only f" (where f -is a "Function") either by inserting actual calls into the code and -recompiling or by calling these in the debugger. LLVM has many nice features -for visualizing various graphs.

- -

Getting back to the generated code, it is fairly simple: the entry block -evaluates the conditional expression ("x" in our case here) and compares the -result to 0.0 with the "fcmp one" -instruction ('one' is "Ordered and Not Equal"). Based on the result of this -expression, the code jumps to either the "then" or "else" blocks, which contain -the expressions for the true/false cases.

- -

Once the then/else blocks are finished executing, they both branch back to the -'ifcont' block to execute the code that happens after the if/then/else. In this -case the only thing left to do is to return to the caller of the function. The -question then becomes: how does the code know which expression to return?

- -

The answer to this question involves an important SSA operation: the -Phi -operation. If you're not familiar with SSA, the wikipedia -article is a good introduction and there are various other introductions to -it available on your favorite search engine. The short version is that -"execution" of the Phi operation requires "remembering" which block control came -from. The Phi operation takes on the value corresponding to the input control -block. In this case, if control comes in from the "then" block, it gets the -value of "calltmp". If control comes from the "else" block, it gets the value -of "calltmp1".

- -

At this point, you are probably starting to think "Oh no! This means my -simple and elegant front-end will have to start generating SSA form in order to -use LLVM!". Fortunately, this is not the case, and we strongly advise -not implementing an SSA construction algorithm in your front-end -unless there is an amazingly good reason to do so. In practice, there are two -sorts of values that float around in code written for your average imperative -programming language that might need Phi nodes:

- -
    -
  1. Code that involves user variables: x = 1; x = x + 1;
  2. -
  3. Values that are implicit in the structure of your AST, such as the Phi node -in this case.
  4. -
- -

In Chapter 7 of this tutorial ("mutable -variables"), we'll talk about #1 -in depth. For now, just believe me that you don't need SSA construction to -handle this case. For #2, you have the choice of using the techniques that we will -describe for #1, or you can insert Phi nodes directly, if convenient. In this -case, it is really really easy to generate the Phi node, so we choose to do it -directly.

- -

Okay, enough of the motivation and overview, lets generate code!

- -
- - -

Code Generation for If/Then/Else

- - -
- -

In order to generate code for this, we implement the Codegen method -for IfExprAST:

- -
-
-let rec codegen_expr = function
-  ...
-  | Ast.If (cond, then_, else_) ->
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-
- -

This code is straightforward and similar to what we saw before. We emit the -expression for the condition, then compare that value to zero to get a truth -value as a 1-bit (bool) value.

- -
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-      position_at_end then_bb builder;
-
-
- -

-As opposed to the C++ tutorial, we have to build -our basic blocks bottom up since we can't have dangling BasicBlocks. We start -off by saving a pointer to the first block (which might not be the entry -block), which we'll need to build a conditional branch later. We do this by -asking the builder for the current BasicBlock. The fourth line -gets the current Function object that is being built. It gets this by the -start_bb for its "parent" (the function it is currently embedded -into).

- -

Once it has that, it creates one block. It is automatically appended into -the function's list of blocks.

- -
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-
- -

We move the builder to start inserting into the "then" block. Strictly -speaking, this call moves the insertion point to be at the end of the specified -block. However, since the "then" block is empty, it also starts out by -inserting at the beginning of the block. :)

- -

Once the insertion point is set, we recursively codegen the "then" expression -from the AST.

- -

The final line here is quite subtle, but is very important. The basic issue -is that when we create the Phi node in the merge block, we need to set up the -block/value pairs that indicate how the Phi will work. Importantly, the Phi -node expects to have an entry for each predecessor of the block in the CFG. Why -then, are we getting the current block when we just set it to ThenBB 5 lines -above? The problem is that the "Then" expression may actually itself change the -block that the Builder is emitting into if, for example, it contains a nested -"if/then/else" expression. Because calling Codegen recursively could -arbitrarily change the notion of the current block, we are required to get an -up-to-date value for code that will set up the Phi node.

- -
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-
- -

Code generation for the 'else' block is basically identical to codegen for -the 'then' block.

- -
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-
- -

The first two lines here are now familiar: the first adds the "merge" block -to the Function object. The second block changes the insertion point so that -newly created code will go into the "merge" block. Once that is done, we need -to create the PHI node and set up the block/value pairs for the PHI.

- -
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-
- -

Once the blocks are created, we can emit the conditional branch that chooses -between them. Note that creating new blocks does not implicitly affect the -IRBuilder, so it is still inserting into the block that the condition -went into. This is why we needed to save the "start" block.

- -
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-
-
- -

To finish off the blocks, we create an unconditional branch -to the merge block. One interesting (and very important) aspect of the LLVM IR -is that it requires all basic blocks -to be "terminated" with a control flow -instruction such as return or branch. This means that all control flow, -including fall throughs must be made explicit in the LLVM IR. If you -violate this rule, the verifier will emit an error. - -

Finally, the CodeGen function returns the phi node as the value computed by -the if/then/else expression. In our example above, this returned value will -feed into the code for the top-level function, which will create the return -instruction.

- -

Overall, we now have the ability to execute conditional code in -Kaleidoscope. With this extension, Kaleidoscope is a fairly complete language -that can calculate a wide variety of numeric functions. Next up we'll add -another useful expression that is familiar from non-functional languages...

- -
- -
- - -

'for' Loop Expression

- - -
- -

Now that we know how to add basic control flow constructs to the language, -we have the tools to add more powerful things. Lets add something more -aggressive, a 'for' expression:

- -
-
- extern putchard(char);
- def printstar(n)
-   for i = 1, i < n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-
- # print 100 '*' characters
- printstar(100);
-
-
- -

This expression defines a new variable ("i" in this case) which iterates from -a starting value, while the condition ("i < n" in this case) is true, -incrementing by an optional step value ("1.0" in this case). If the step value -is omitted, it defaults to 1.0. While the loop is true, it executes its -body expression. Because we don't have anything better to return, we'll just -define the loop as always returning 0.0. In the future when we have mutable -variables, it will get more useful.

- -

As before, lets talk about the changes that we need to Kaleidoscope to -support this.

- - -

Lexer Extensions for the 'for' Loop

- - -
- -

The lexer extensions are the same sort of thing as for if/then/else:

- -
-
-  ... in Token.token ...
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  ... in Lexer.lex_ident...
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | "if" -> [< 'Token.If; stream >]
-      | "then" -> [< 'Token.Then; stream >]
-      | "else" -> [< 'Token.Else; stream >]
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-
- -
- - -

AST Extensions for the 'for' Loop

- - -
- -

The AST variant is just as simple. It basically boils down to capturing -the variable name and the constituent expressions in the node.

- -
-
-type expr =
-  ...
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-
- -
- - -

Parser Extensions for the 'for' Loop

- - -
- -

The parser code is also fairly standard. The only interesting thing here is -handling of the optional step value. The parser code handles it by checking to -see if the second comma is present. If not, it sets the step value to null in -the AST node:

- -
-
-let rec parse_primary = parser
-  ...
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [< 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream >] ->
-      begin parser
-        | [<
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream >] ->
-            let step =
-              begin parser
-              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
-              | [< >] -> None
-              end stream
-            in
-            begin parser
-            | [< 'Token.In; body=parse_expr >] ->
-                Ast.For (id, start, end_, step, body)
-            | [< >] ->
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [< >] ->
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-
- -
- - -

LLVM IR for the 'for' Loop

- - -
- -

Now we get to the good part: the LLVM IR we want to generate for this thing. -With the simple example above, we get this LLVM IR (note that this dump is -generated with optimizations disabled for clarity): -

- -
-
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-        ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:    ; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-        ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-        ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-        ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:    ; preds = %loop
-        ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-
-
- -

This loop contains all the same constructs we saw before: a phi node, several -expressions, and some basic blocks. Lets see how this fits together.

- -
- - -

Code Generation for the 'for' Loop

- - -
- -

The first part of Codegen is very simple: we just output the start expression -for the loop value:

- -
-
-let rec codegen_expr = function
-  ...
-  | Ast.For (var_name, start, end_, step, body) ->
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-
- -

With this out of the way, the next step is to set up the LLVM basic block -for the start of the loop body. In the case above, the whole loop body is one -block, but remember that the body code itself could consist of multiple blocks -(e.g. if it contains an if/then/else or a for/in expression).

- -
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-
- -

This code is similar to what we saw for if/then/else. Because we will need -it to create the Phi node, we remember the block that falls through into the -loop. Once we have that, we create the actual block that starts the loop and -create an unconditional branch for the fall-through between the two blocks.

- -
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-
- -

Now that the "preheader" for the loop is set up, we switch to emitting code -for the loop body. To begin with, we move the insertion point and create the -PHI node for the loop induction variable. Since we already know the incoming -value for the starting value, we add it to the Phi node. Note that the Phi will -eventually get a second value for the backedge, but we can't set it up yet -(because it doesn't exist!).

- -
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -> None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-
- -

Now the code starts to get more interesting. Our 'for' loop introduces a new -variable to the symbol table. This means that our symbol table can now contain -either function arguments or loop variables. To handle this, before we codegen -the body of the loop, we add the loop variable as the current value for its -name. Note that it is possible that there is a variable of the same name in the -outer scope. It would be easy to make this an error (emit an error and return -null if there is already an entry for VarName) but we choose to allow shadowing -of variables. In order to handle this correctly, we remember the Value that -we are potentially shadowing in old_val (which will be None if there is -no shadowed variable).

- -

Once the loop variable is set into the symbol table, the code recursively -codegen's the body. This allows the body to use the loop variable: any -references to it will naturally find it in the symbol table.

- -
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -> codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -> const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-
- -

Now that the body is emitted, we compute the next value of the iteration -variable by adding the step value, or 1.0 if it isn't present. -'next_var' will be the value of the loop variable on the next iteration -of the loop.

- -
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-
- -

Finally, we evaluate the exit value of the loop, to determine whether the -loop should exit. This mirrors the condition evaluation for the if/then/else -statement.

- -
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-
- -

With the code for the body of the loop complete, we just need to finish up -the control flow for it. This code remembers the end block (for the phi node), then creates the block for the loop exit ("afterloop"). Based on the value of the -exit condition, it creates a conditional branch that chooses between executing -the loop again and exiting the loop. Any future code is emitted in the -"afterloop" block, so it sets the insertion position to it.

- -
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -> Hashtbl.add named_values var_name old_val
-      | None -> ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-
- -

The final code handles various cleanups: now that we have the -"next_var" value, we can add the incoming value to the loop PHI node. -After that, we remove the loop variable from the symbol table, so that it isn't -in scope after the for loop. Finally, code generation of the for loop always -returns 0.0, so that is what we return from Codegen.codegen_expr.

- -

With this, we conclude the "adding control flow to Kaleidoscope" chapter of -the tutorial. In this chapter we added two control flow constructs, and used -them to motivate a couple of aspects of the LLVM IR that are important for -front-end implementors to know. In the next chapter of our saga, we will get -a bit crazier and add user-defined operators -to our poor innocent language.

- -
- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -if/then/else and for expressions.. To build this example, use: -

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
-<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
-<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
-
-
- -
myocamlbuild.ml:
-
-
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | "if" -> [< 'Token.If; stream >]
-      | "then" -> [< 'Token.Then; stream >]
-      | "else" -> [< 'Token.Else; stream >]
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [< 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [< 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream >] ->
-      begin parser
-        | [<
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream >] ->
-            let step =
-              begin parser
-              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
-              | [< >] -> None
-              end stream
-            in
-            begin parser
-            | [< 'Token.In; body=parse_expr >] ->
-                Ast.For (id, start, end_, step, body)
-            | [< >] ->
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [< >] ->
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
codegen.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -> const_float double_type n
-  | Ast.Variable name ->
-      (try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_add lhs_val rhs_val "addtmp" builder
-        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_mul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -> raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) ->
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) ->
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -> None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -> codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -> const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -> Hashtbl.add named_values var_name old_val
-      | None -> ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -> declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f <> At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) <> ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern ->
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-
-
- -
bindings.c
-
-
-#include <stdio.h>
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-
-
- -Next: Extending the language: user-defined -operators -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst new file mode 100644 index 000000000000..b8ae3c58ddff --- /dev/null +++ b/docs/tutorial/OCamlLangImpl5.rst @@ -0,0 +1,1362 @@ +================================================== +Kaleidoscope: Extending the Language: Control Flow +================================================== + +.. contents:: + :local: + +Chapter 5 Introduction +====================== + +Welcome to Chapter 5 of the "`Implementing a language with +LLVM `_" tutorial. Parts 1-4 described the implementation of +the simple Kaleidoscope language and included support for generating +LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as +presented, Kaleidoscope is mostly useless: it has no control flow other +than call and return. This means that you can't have conditional +branches in the code, significantly limiting its power. In this episode +of "build that compiler", we'll extend Kaleidoscope to have an +if/then/else expression plus a simple 'for' loop. + +If/Then/Else +============ + +Extending Kaleidoscope to support if/then/else is quite straightforward. +It basically requires adding lexer support for this "new" concept to the +lexer, parser, AST, and LLVM code emitter. This example is nice, because +it shows how easy it is to "grow" a language over time, incrementally +extending it as new ideas are discovered. + +Before we get going on "how" we add this extension, lets talk about +"what" we want. The basic idea is that we want to be able to write this +sort of thing: + +:: + + def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2); + +In Kaleidoscope, every construct is an expression: there are no +statements. As such, the if/then/else expression needs to return a value +like any other. Since we're using a mostly functional form, we'll have +it evaluate its conditional, then return the 'then' or 'else' value +based on how the condition was resolved. This is very similar to the C +"?:" expression. + +The semantics of the if/then/else expression is that it evaluates the +condition to a boolean equality value: 0.0 is considered to be false and +everything else is considered to be true. If the condition is true, the +first subexpression is evaluated and returned, if the condition is +false, the second subexpression is evaluated and returned. Since +Kaleidoscope allows side-effects, this behavior is important to nail +down. + +Now that we know what we "want", lets break this down into its +constituent pieces. + +Lexer Extensions for If/Then/Else +--------------------------------- + +The lexer extensions are straightforward. First we add new variants for +the relevant tokens: + +.. code-block:: ocaml + + (* control *) + | If | Then | Else | For | In + +Once we have that, we recognize the new keywords in the lexer. This is +pretty simple stuff: + +.. code-block:: ocaml + + ... + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | id -> [< 'Token.Ident id; stream >] + +AST Extensions for If/Then/Else +------------------------------- + +To represent the new expression we add a new AST variant for it: + +.. code-block:: ocaml + + type expr = + ... + (* variant for if/then/else. *) + | If of expr * expr * expr + +The AST variant just has pointers to the various subexpressions. + +Parser Extensions for If/Then/Else +---------------------------------- + +Now that we have the relevant tokens coming from the lexer and we have +the AST node to build, our parsing logic is relatively straightforward. +First we define a new parsing function: + +.. code-block:: ocaml + + let rec parse_primary = parser + ... + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + +Next we hook it up as a primary expression: + +.. code-block:: ocaml + + let rec parse_primary = parser + ... + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + +LLVM IR for If/Then/Else +------------------------ + +Now that we have it parsing and building the AST, the final piece is +adding LLVM code generation support. This is the most interesting part +of the if/then/else example, because this is where it starts to +introduce new concepts. All of the code above has been thoroughly +described in previous chapters. + +To motivate the code we want to produce, lets take a look at a simple +example. Consider: + +:: + + extern foo(); + extern bar(); + def baz(x) if x then foo() else bar(); + +If you disable optimizations, the code you'll (soon) get from +Kaleidoscope looks like this: + +.. code-block:: llvm + + declare double @foo() + + declare double @bar() + + define double @baz(double %x) { + entry: + %ifcond = fcmp one double %x, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: ; preds = %entry + %calltmp = call double @foo() + br label %ifcont + + else: ; preds = %entry + %calltmp1 = call double @bar() + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ] + ret double %iftmp + } + +To visualize the control flow graph, you can use a nifty feature of the +LLVM '`opt `_' tool. If you put this LLVM +IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a +window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll +see this graph: + +.. figure:: LangImpl5-cfg.png + :align: center + :alt: Example CFG + + Example CFG + +Another way to get this is to call +"``Llvm_analysis.view_function_cfg f``" or +"``Llvm_analysis.view_function_cfg_only f``" (where ``f`` is a +"``Function``") either by inserting actual calls into the code and +recompiling or by calling these in the debugger. LLVM has many nice +features for visualizing various graphs. + +Getting back to the generated code, it is fairly simple: the entry block +evaluates the conditional expression ("x" in our case here) and compares +the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered +and Not Equal"). Based on the result of this expression, the code jumps +to either the "then" or "else" blocks, which contain the expressions for +the true/false cases. + +Once the then/else blocks are finished executing, they both branch back +to the 'ifcont' block to execute the code that happens after the +if/then/else. In this case the only thing left to do is to return to the +caller of the function. The question then becomes: how does the code +know which expression to return? + +The answer to this question involves an important SSA operation: the +`Phi +operation `_. +If you're not familiar with SSA, `the wikipedia +article `_ +is a good introduction and there are various other introductions to it +available on your favorite search engine. The short version is that +"execution" of the Phi operation requires "remembering" which block +control came from. The Phi operation takes on the value corresponding to +the input control block. In this case, if control comes in from the +"then" block, it gets the value of "calltmp". If control comes from the +"else" block, it gets the value of "calltmp1". + +At this point, you are probably starting to think "Oh no! This means my +simple and elegant front-end will have to start generating SSA form in +order to use LLVM!". Fortunately, this is not the case, and we strongly +advise *not* implementing an SSA construction algorithm in your +front-end unless there is an amazingly good reason to do so. In +practice, there are two sorts of values that float around in code +written for your average imperative programming language that might need +Phi nodes: + +#. Code that involves user variables: ``x = 1; x = x + 1;`` +#. Values that are implicit in the structure of your AST, such as the + Phi node in this case. + +In `Chapter 7 `_ of this tutorial ("mutable +variables"), we'll talk about #1 in depth. For now, just believe me that +you don't need SSA construction to handle this case. For #2, you have +the choice of using the techniques that we will describe for #1, or you +can insert Phi nodes directly, if convenient. In this case, it is really +really easy to generate the Phi node, so we choose to do it directly. + +Okay, enough of the motivation and overview, lets generate code! + +Code Generation for If/Then/Else +-------------------------------- + +In order to generate code for this, we implement the ``Codegen`` method +for ``IfExprAST``: + +.. code-block:: ocaml + + let rec codegen_expr = function + ... + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + +This code is straightforward and similar to what we saw before. We emit +the expression for the condition, then compare that value to zero to get +a truth value as a 1-bit (bool) value. + +.. code-block:: ocaml + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + position_at_end then_bb builder; + +As opposed to the `C++ tutorial `_, we have to build our +basic blocks bottom up since we can't have dangling BasicBlocks. We +start off by saving a pointer to the first block (which might not be the +entry block), which we'll need to build a conditional branch later. We +do this by asking the ``builder`` for the current BasicBlock. The fourth +line gets the current Function object that is being built. It gets this +by the ``start_bb`` for its "parent" (the function it is currently +embedded into). + +Once it has that, it creates one block. It is automatically appended +into the function's list of blocks. + +.. code-block:: ocaml + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + +We move the builder to start inserting into the "then" block. Strictly +speaking, this call moves the insertion point to be at the end of the +specified block. However, since the "then" block is empty, it also +starts out by inserting at the beginning of the block. :) + +Once the insertion point is set, we recursively codegen the "then" +expression from the AST. + +The final line here is quite subtle, but is very important. The basic +issue is that when we create the Phi node in the merge block, we need to +set up the block/value pairs that indicate how the Phi will work. +Importantly, the Phi node expects to have an entry for each predecessor +of the block in the CFG. Why then, are we getting the current block when +we just set it to ThenBB 5 lines above? The problem is that the "Then" +expression may actually itself change the block that the Builder is +emitting into if, for example, it contains a nested "if/then/else" +expression. Because calling Codegen recursively could arbitrarily change +the notion of the current block, we are required to get an up-to-date +value for code that will set up the Phi node. + +.. code-block:: ocaml + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + +Code generation for the 'else' block is basically identical to codegen +for the 'then' block. + +.. code-block:: ocaml + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + +The first two lines here are now familiar: the first adds the "merge" +block to the Function object. The second block changes the insertion +point so that newly created code will go into the "merge" block. Once +that is done, we need to create the PHI node and set up the block/value +pairs for the PHI. + +.. code-block:: ocaml + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + +Once the blocks are created, we can emit the conditional branch that +chooses between them. Note that creating new blocks does not implicitly +affect the IRBuilder, so it is still inserting into the block that the +condition went into. This is why we needed to save the "start" block. + +.. code-block:: ocaml + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + +To finish off the blocks, we create an unconditional branch to the merge +block. One interesting (and very important) aspect of the LLVM IR is +that it `requires all basic blocks to be +"terminated" <../LangRef.html#functionstructure>`_ with a `control flow +instruction <../LangRef.html#terminators>`_ such as return or branch. +This means that all control flow, *including fall throughs* must be made +explicit in the LLVM IR. If you violate this rule, the verifier will +emit an error. + +Finally, the CodeGen function returns the phi node as the value computed +by the if/then/else expression. In our example above, this returned +value will feed into the code for the top-level function, which will +create the return instruction. + +Overall, we now have the ability to execute conditional code in +Kaleidoscope. With this extension, Kaleidoscope is a fairly complete +language that can calculate a wide variety of numeric functions. Next up +we'll add another useful expression that is familiar from non-functional +languages... + +'for' Loop Expression +===================== + +Now that we know how to add basic control flow constructs to the +language, we have the tools to add more powerful things. Lets add +something more aggressive, a 'for' expression: + +:: + + extern putchard(char); + def printstar(n) + for i = 1, i < n, 1.0 in + putchard(42); # ascii 42 = '*' + + # print 100 '*' characters + printstar(100); + +This expression defines a new variable ("i" in this case) which iterates +from a starting value, while the condition ("i < n" in this case) is +true, incrementing by an optional step value ("1.0" in this case). If +the step value is omitted, it defaults to 1.0. While the loop is true, +it executes its body expression. Because we don't have anything better +to return, we'll just define the loop as always returning 0.0. In the +future when we have mutable variables, it will get more useful. + +As before, lets talk about the changes that we need to Kaleidoscope to +support this. + +Lexer Extensions for the 'for' Loop +----------------------------------- + +The lexer extensions are the same sort of thing as for if/then/else: + +.. code-block:: ocaml + + ... in Token.token ... + (* control *) + | If | Then | Else + | For | In + + ... in Lexer.lex_ident... + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | id -> [< 'Token.Ident id; stream >] + +AST Extensions for the 'for' Loop +--------------------------------- + +The AST variant is just as simple. It basically boils down to capturing +the variable name and the constituent expressions in the node. + +.. code-block:: ocaml + + type expr = + ... + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + +Parser Extensions for the 'for' Loop +------------------------------------ + +The parser code is also fairly standard. The only interesting thing here +is handling of the optional step value. The parser code handles it by +checking to see if the second comma is present. If not, it sets the step +value to null in the AST node: + +.. code-block:: ocaml + + let rec parse_primary = parser + ... + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + +LLVM IR for the 'for' Loop +-------------------------- + +Now we get to the good part: the LLVM IR we want to generate for this +thing. With the simple example above, we get this LLVM IR (note that +this dump is generated with optimizations disabled for clarity): + +.. code-block:: llvm + + declare double @putchard(double) + + define double @printstar(double %n) { + entry: + ; initial value = 1.0 (inlined into phi) + br label %loop + + loop: ; preds = %loop, %entry + %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ] + ; body + %calltmp = call double @putchard(double 4.200000e+01) + ; increment + %nextvar = fadd double %i, 1.000000e+00 + + ; termination test + %cmptmp = fcmp ult double %i, %n + %booltmp = uitofp i1 %cmptmp to double + %loopcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %loopcond, label %loop, label %afterloop + + afterloop: ; preds = %loop + ; loop always returns 0.0 + ret double 0.000000e+00 + } + +This loop contains all the same constructs we saw before: a phi node, +several expressions, and some basic blocks. Lets see how this fits +together. + +Code Generation for the 'for' Loop +---------------------------------- + +The first part of Codegen is very simple: we just output the start +expression for the loop value: + +.. code-block:: ocaml + + let rec codegen_expr = function + ... + | Ast.For (var_name, start, end_, step, body) -> + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + +With this out of the way, the next step is to set up the LLVM basic +block for the start of the loop body. In the case above, the whole loop +body is one block, but remember that the body code itself could consist +of multiple blocks (e.g. if it contains an if/then/else or a for/in +expression). + +.. code-block:: ocaml + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let preheader_bb = insertion_block builder in + let the_function = block_parent preheader_bb in + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + +This code is similar to what we saw for if/then/else. Because we will +need it to create the Phi node, we remember the block that falls through +into the loop. Once we have that, we create the actual block that starts +the loop and create an unconditional branch for the fall-through between +the two blocks. + +.. code-block:: ocaml + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Start the PHI node with an entry for start. *) + let variable = build_phi [(start_val, preheader_bb)] var_name builder in + +Now that the "preheader" for the loop is set up, we switch to emitting +code for the loop body. To begin with, we move the insertion point and +create the PHI node for the loop induction variable. Since we already +know the incoming value for the starting value, we add it to the Phi +node. Note that the Phi will eventually get a second value for the +backedge, but we can't set it up yet (because it doesn't exist!). + +.. code-block:: ocaml + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name variable; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + +Now the code starts to get more interesting. Our 'for' loop introduces a +new variable to the symbol table. This means that our symbol table can +now contain either function arguments or loop variables. To handle this, +before we codegen the body of the loop, we add the loop variable as the +current value for its name. Note that it is possible that there is a +variable of the same name in the outer scope. It would be easy to make +this an error (emit an error and return null if there is already an +entry for VarName) but we choose to allow shadowing of variables. In +order to handle this correctly, we remember the Value that we are +potentially shadowing in ``old_val`` (which will be None if there is no +shadowed variable). + +Once the loop variable is set into the symbol table, the code +recursively codegen's the body. This allows the body to use the loop +variable: any references to it will naturally find it in the symbol +table. + +.. code-block:: ocaml + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + let next_var = build_add variable step_val "nextvar" builder in + +Now that the body is emitted, we compute the next value of the iteration +variable by adding the step value, or 1.0 if it isn't present. +'``next_var``' will be the value of the loop variable on the next +iteration of the loop. + +.. code-block:: ocaml + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + +Finally, we evaluate the exit value of the loop, to determine whether +the loop should exit. This mirrors the condition evaluation for the +if/then/else statement. + +.. code-block:: ocaml + + (* Create the "after loop" block and insert it. *) + let loop_end_bb = insertion_block builder in + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + +With the code for the body of the loop complete, we just need to finish +up the control flow for it. This code remembers the end block (for the +phi node), then creates the block for the loop exit ("afterloop"). Based +on the value of the exit condition, it creates a conditional branch that +chooses between executing the loop again and exiting the loop. Any +future code is emitted in the "afterloop" block, so it sets the +insertion position to it. + +.. code-block:: ocaml + + (* Add a new entry to the PHI node for the backedge. *) + add_incoming (next_var, loop_end_bb) variable; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + +The final code handles various cleanups: now that we have the +"``next_var``" value, we can add the incoming value to the loop PHI +node. After that, we remove the loop variable from the symbol table, so +that it isn't in scope after the for loop. Finally, code generation of +the for loop always returns 0.0, so that is what we return from +``Codegen.codegen_expr``. + +With this, we conclude the "adding control flow to Kaleidoscope" chapter +of the tutorial. In this chapter we added two control flow constructs, +and used them to motivate a couple of aspects of the LLVM IR that are +important for front-end implementors to know. In the next chapter of our +saga, we will get a bit crazier and add `user-defined +operators `_ to our poor innocent language. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions.. To build this example, use: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + <*.{byte,native}>: g++, use_llvm, use_llvm_analysis + <*.{byte,native}>: use_llvm_executionengine, use_llvm_target + <*.{byte,native}>: use_llvm_scalar_opts, use_bindings + +myocamlbuild.ml: + .. code-block:: ocaml + + open Ocamlbuild_plugin;; + + ocaml_lib ~extern:true "llvm";; + ocaml_lib ~extern:true "llvm_analysis";; + ocaml_lib ~extern:true "llvm_executionengine";; + ocaml_lib ~extern:true "llvm_target";; + ocaml_lib ~extern:true "llvm_scalar_opts";; + + flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; + dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = Prototype of string * string array + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +codegen.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + + open Llvm + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + + let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let preheader_bb = insertion_block builder in + let the_function = block_parent preheader_bb in + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Start the PHI node with an entry for start. *) + let variable = build_phi [(start_val, preheader_bb)] var_name builder in + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name variable; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + let next_var = build_add variable step_val "nextvar" builder in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let loop_end_bb = insertion_block builder in + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Add a new entry to the PHI node for the backedge. *) + add_incoming (next_var, loop_end_bb) variable; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + + let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + + let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + + (* top ::= definition | external | expression | ';' *) + let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + open Llvm_target + open Llvm_scalar_opts + + let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module + ;; + + main () + +bindings.c + .. code-block:: c + + #include + + /* putchard - putchar that takes a double and returns 0. */ + extern double putchard(double X) { + putchar((char)X); + return 0; + } + +`Next: Extending the language: user-defined +operators `_ + diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html deleted file mode 100644 index db252406fed7..000000000000 --- a/docs/tutorial/OCamlLangImpl6.html +++ /dev/null @@ -1,1574 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: User-defined Operators - - - - - - - - -

Kaleidoscope: Extending the Language: User-defined Operators

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 6 Introduction

- - -
- -

Welcome to Chapter 6 of the "Implementing a language -with LLVM" tutorial. At this point in our tutorial, we now have a fully -functional language that is fairly minimal, but also useful. There -is still one big problem with it, however. Our language doesn't have many -useful operators (like division, logical negation, or even any comparisons -besides less-than).

- -

This chapter of the tutorial takes a wild digression into adding user-defined -operators to the simple and beautiful Kaleidoscope language. This digression now -gives us a simple and ugly language in some ways, but also a powerful one at the -same time. One of the great things about creating your own language is that you -get to decide what is good or bad. In this tutorial we'll assume that it is -okay to use this as a way to show some interesting parsing techniques.

- -

At the end of this tutorial, we'll run through an example Kaleidoscope -application that renders the Mandelbrot set. This gives -an example of what you can build with Kaleidoscope and its feature set.

- -
- - -

User-defined Operators: the Idea

- - -
- -

-The "operator overloading" that we will add to Kaleidoscope is more general than -languages like C++. In C++, you are only allowed to redefine existing -operators: you can't programatically change the grammar, introduce new -operators, change precedence levels, etc. In this chapter, we will add this -capability to Kaleidoscope, which will let the user round out the set of -operators that are supported.

- -

The point of going into user-defined operators in a tutorial like this is to -show the power and flexibility of using a hand-written parser. Thus far, the parser -we have been implementing uses recursive descent for most parts of the grammar and -operator precedence parsing for the expressions. See Chapter 2 for details. Without using operator -precedence parsing, it would be very difficult to allow the programmer to -introduce new operators into the grammar: the grammar is dynamically extensible -as the JIT runs.

- -

The two specific features we'll add are programmable unary operators (right -now, Kaleidoscope has no unary operators at all) as well as binary operators. -An example of this is:

- -
-
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define > with the same precedence as <.
-def binary> 10 (LHS RHS)
-  RHS < LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS < RHS | LHS > RHS);
-
-
- -

Many languages aspire to being able to implement their standard runtime -library in the language itself. In Kaleidoscope, we can implement significant -parts of the language in the library!

- -

We will break down implementation of these features into two parts: -implementing support for user-defined binary operators and adding unary -operators.

- -
- - -

User-defined Binary Operators

- - -
- -

Adding support for user-defined binary operators is pretty simple with our -current framework. We'll first add support for the unary/binary keywords:

- -
-
-type token =
-  ...
-  (* operators *)
-  | Binary | Unary
-
-...
-
-and lex_ident buffer = parser
-  ...
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | "binary" -> [< 'Token.Binary; stream >]
-      | "unary" -> [< 'Token.Unary; stream >]
-
-
- -

This just adds lexer support for the unary and binary keywords, like we -did in previous chapters. One nice -thing about our current AST, is that we represent binary operators with full -generalisation by using their ASCII code as the opcode. For our extended -operators, we'll use this same representation, so we don't need any new AST or -parser support.

- -

On the other hand, we have to be able to represent the definitions of these -new operators, in the "def binary| 5" part of the function definition. In our -grammar so far, the "name" for the function definition is parsed as the -"prototype" production and into the Ast.Prototype AST node. To -represent our new user-defined operators as prototypes, we have to extend -the Ast.Prototype AST node like this:

- -
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-
- -

Basically, in addition to knowing a name for the prototype, we now keep track -of whether it was an operator, and if it was, what precedence level the operator -is at. The precedence is only used for binary operators (as you'll see below, -it just doesn't apply for unary operators). Now that we have a way to represent -the prototype for a user-defined operator, we need to parse it:

- -
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-  let parse_operator = parser
-    | [< 'Token.Unary >] -> "unary", 1
-    | [< 'Token.Binary >] -> "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [< 'Token.Number n >] -> int_of_float n
-    | [< >] -> 30
-  in
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [< (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-
- -

This is all fairly straightforward parsing code, and we have already seen -a lot of similar code in the past. One interesting part about the code above is -the couple lines that set up name for binary operators. This builds -names like "binary@" for a newly defined "@" operator. This then takes -advantage of the fact that symbol names in the LLVM symbol table are allowed to -have any character in them, including embedded nul characters.

- -

The next interesting thing to add, is codegen support for these binary -operators. Given our current structure, this is a simple addition of a default -case for our existing binary operator node:

- -
-
-let codegen_expr = function
-  ...
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_add lhs_val rhs_val "addtmp" builder
-        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_mul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ ->
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -> callee
-              | None -> raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder
-      end
-
-
- -

As you can see above, the new code is actually really simple. It just does -a lookup for the appropriate operator in the symbol table and generates a -function call to it. Since user-defined operators are just built as normal -functions (because the "prototype" boils down to a function with the right -name) everything falls into place.

- -

The final piece of code we are missing, is a bit of top level magic:

- -
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) ->
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -> ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-      ...
-
-
- -

Basically, before codegening a function, if it is a user-defined operator, we -register it in the precedence table. This allows the binary operator parsing -logic we already have in place to handle it. Since we are working on a -fully-general operator precedence parser, this is all we need to do to "extend -the grammar".

- -

Now we have useful user-defined binary operators. This builds a lot -on the previous framework we built for other operators. Adding unary operators -is a bit more challenging, because we don't have any framework for it yet - lets -see what it takes.

- -
- - -

User-defined Unary Operators

- - -
- -

Since we don't currently support unary operators in the Kaleidoscope -language, we'll need to add everything to support them. Above, we added simple -support for the 'unary' keyword to the lexer. In addition to that, we need an -AST node:

- -
-
-type expr =
-  ...
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-  ...
-
-
- -

This AST node is very simple and obvious by now. It directly mirrors the -binary operator AST node, except that it only has one child. With this, we -need to add the parsing logic. Parsing a unary operator is pretty simple: we'll -add a new function to do it:

- -
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [< stream >] -> parse_primary stream
-
-
- -

The grammar we add is pretty straightforward here. If we see a unary -operator when parsing a primary operator, we eat the operator as a prefix and -parse the remaining piece as another unary operator. This allows us to handle -multiple unary operators (e.g. "!!x"). Note that unary operators can't have -ambiguous parses like binary operators can, so there is no need for precedence -information.

- -

The problem with this function, is that we need to call ParseUnary from -somewhere. To do this, we change previous callers of ParsePrimary to call -parse_unary instead:

- -
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-        ...
-        (* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-        ...
-
-...
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
-
-
- -

With these two simple changes, we are now able to parse unary operators and build the -AST for them. Next up, we need to add parser support for prototypes, to parse -the unary operator prototype. We extend the binary operator code above -with:

- -
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-  let parse_operator = parser
-    | [< 'Token.Unary >] -> "unary", 1
-    | [< 'Token.Binary >] -> "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [< 'Token.Number n >] -> int_of_float n
-    | [< >] -> 30
-  in
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [< (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-
- -

As with binary operators, we name unary operators with a name that includes -the operator character. This assists us at code generation time. Speaking of, -the final piece we need to add is codegen support for unary operators. It looks -like this:

- -
-
-let rec codegen_expr = function
-  ...
-  | Ast.Unary (op, operand) ->
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-
-
- -

This code is similar to, but simpler than, the code for binary operators. It -is simpler primarily because it doesn't need to handle any predefined operators. -

- -
- - -

Kicking the Tires

- - -
- -

It is somewhat hard to believe, but with a few simple extensions we've -covered in the last chapters, we have grown a real-ish language. With this, we -can do a lot of interesting things, including I/O, math, and a bunch of other -things. For example, we can now add a nice sequencing operator (printd is -defined to print out the specified value and a newline):

- -
-
-ready> extern printd(x);
-Read extern: declare double @printd(double)
-ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
-..
-ready> printd(123) : printd(456) : printd(789);
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-
-
- -

We can also define a bunch of other "primitive" operations, such as:

- -
-
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define > with the same precedence as <.
-def binary> 10 (LHS RHS)
-  RHS < LHS;
-
-# Binary logical or, which does not short circuit.
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit.
-def binary& 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS < RHS | LHS > RHS);
-
-
-
- - -

Given the previous if/then/else support, we can also define interesting -functions for I/O. For example, the following prints out a character whose -"density" reflects the value passed in: the lower the value, the denser the -character:

- -
-
-ready>
-
-extern putchard(char)
-def printdensity(d)
-  if d > 8 then
-    putchard(32)  # ' '
-  else if d > 4 then
-    putchard(46)  # '.'
-  else if d > 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'
-...
-ready> printdensity(1): printdensity(2): printdensity(3) :
-          printdensity(4): printdensity(5): printdensity(9): putchard(10);
-*++..
-Evaluated to 0.000000
-
-
- -

Based on these simple primitive operations, we can start to define more -interesting things. For example, here's a little function that solves for the -number of iterations it takes a function in the complex plane to -converge:

- -
-
-# determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters > 255 | (real*real + imag*imag > 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-
-
- -

This "z = z2 + c" function is a beautiful little creature that is the basis -for computation of the Mandelbrot Set. Our -mandelconverge function returns the number of iterations that it takes -for a complex orbit to escape, saturating to 255. This is not a very useful -function by itself, but if you plot its value over a two-dimensional plane, -you can see the Mandelbrot set. Given that we are limited to using putchard -here, our amazing graphical output is limited, but we can whip together -something using the density plotter above:

- -
-
-# compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y < ymax, ystep in (
-    (for x = xmin, x < xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
-
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag)
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-
-
- -

Given this, we can try plotting out the mandlebrot set! Lets try it out:

- -
-
-ready> mandel(-2.3, -1.3, 0.05, 0.07);
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready> mandel(-2, -1, 0.02, 0.04);
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
-**********++++++++++++++++++++++++++++++++++++++++++++++.............
-********+++++++++++++++++++++++++++++++++++++++++++..................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-******+++++++++++++++++++++++++++++++++++...........................
-*****++++++++++++++++++++++++++++++++............................
-*****++++++++++++++++++++++++++++...............................
-****++++++++++++++++++++++++++......   .........................
-***++++++++++++++++++++++++.........     ......    ...........
-***++++++++++++++++++++++............
-**+++++++++++++++++++++..............
-**+++++++++++++++++++................
-*++++++++++++++++++.................
-*++++++++++++++++............ ...
-*++++++++++++++..............
-*+++....++++................
-*..........  ...........
-*
-*..........  ...........
-*+++....++++................
-*++++++++++++++..............
-*++++++++++++++++............ ...
-*++++++++++++++++++.................
-**+++++++++++++++++++................
-**+++++++++++++++++++++..............
-***++++++++++++++++++++++............
-***++++++++++++++++++++++++.........     ......    ...........
-****++++++++++++++++++++++++++......   .........................
-*****++++++++++++++++++++++++++++...............................
-*****++++++++++++++++++++++++++++++++............................
-******+++++++++++++++++++++++++++++++++++...........................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-********+++++++++++++++++++++++++++++++++++++++++++..................
-Evaluated to 0.000000
-ready> mandel(-0.9, -1.4, 0.02, 0.03);
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready> ^D
-
-
- -

At this point, you may be starting to realize that Kaleidoscope is a real -and powerful language. It may not be self-similar :), but it can be used to -plot things that are!

- -

With this, we conclude the "adding user-defined operators" chapter of the -tutorial. We have successfully augmented our language, adding the ability to -extend the language in the library, and we have shown how this can be used to -build a simple but interesting end-user application in Kaleidoscope. At this -point, Kaleidoscope can build a variety of applications that are functional and -can call functions with side-effects, but it can't actually define and mutate a -variable itself.

- -

Strikingly, variable mutation is an important feature of some -languages, and it is not at all obvious how to add -support for mutable variables without having to add an "SSA construction" -phase to your front-end. In the next chapter, we will describe how you can -add variable mutation without building SSA in your front-end.

- -
- - - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with the -if/then/else and for expressions.. To build this example, use: -

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
-<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
-<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
-
-
- -
myocamlbuild.ml:
-
-
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | "if" -> [< 'Token.If; stream >]
-      | "then" -> [< 'Token.Then; stream >]
-      | "else" -> [< 'Token.Else; stream >]
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | "binary" -> [< 'Token.Binary; stream >]
-      | "unary" -> [< 'Token.Unary; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [< 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [< 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream >] ->
-      begin parser
-        | [<
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream >] ->
-            let step =
-              begin parser
-              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
-              | [< >] -> None
-              end stream
-            in
-            begin parser
-            | [< 'Token.In; body=parse_expr >] ->
-                Ast.For (id, start, end_, step, body)
-            | [< >] ->
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [< >] ->
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [< stream >] -> parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-  let parse_operator = parser
-    | [< 'Token.Unary >] -> "unary", 1
-    | [< 'Token.Binary >] -> "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [< 'Token.Number n >] -> int_of_float n
-    | [< >] -> 30
-  in
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [< (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
codegen.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -> const_float double_type n
-  | Ast.Variable name ->
-      (try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name"))
-  | Ast.Unary (op, operand) ->
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) ->
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -> build_add lhs_val rhs_val "addtmp" builder
-        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -> build_mul lhs_val rhs_val "multmp" builder
-        | '<' ->
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ ->
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -> callee
-              | None -> raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder
-      end
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) ->
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) ->
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -> None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -> codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -> const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -> Hashtbl.add named_values var_name old_val
-      | None -> ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -> declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f <> At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) <> ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) ->
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -> ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern ->
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-
-
- -
bindings.c
-
-
-#include <stdio.h>
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-
-
- -Next: Extending the language: mutable variables / -SSA construction -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- Erick Tryzelaar
- The LLVM Compiler Infrastructure
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl6.rst b/docs/tutorial/OCamlLangImpl6.rst new file mode 100644 index 000000000000..36bffa8e9696 --- /dev/null +++ b/docs/tutorial/OCamlLangImpl6.rst @@ -0,0 +1,1441 @@ +============================================================ +Kaleidoscope: Extending the Language: User-defined Operators +============================================================ + +.. contents:: + :local: + +Chapter 6 Introduction +====================== + +Welcome to Chapter 6 of the "`Implementing a language with +LLVM `_" tutorial. At this point in our tutorial, we now +have a fully functional language that is fairly minimal, but also +useful. There is still one big problem with it, however. Our language +doesn't have many useful operators (like division, logical negation, or +even any comparisons besides less-than). + +This chapter of the tutorial takes a wild digression into adding +user-defined operators to the simple and beautiful Kaleidoscope +language. This digression now gives us a simple and ugly language in +some ways, but also a powerful one at the same time. One of the great +things about creating your own language is that you get to decide what +is good or bad. In this tutorial we'll assume that it is okay to use +this as a way to show some interesting parsing techniques. + +At the end of this tutorial, we'll run through an example Kaleidoscope +application that `renders the Mandelbrot set <#example>`_. This gives an +example of what you can build with Kaleidoscope and its feature set. + +User-defined Operators: the Idea +================================ + +The "operator overloading" that we will add to Kaleidoscope is more +general than languages like C++. In C++, you are only allowed to +redefine existing operators: you can't programatically change the +grammar, introduce new operators, change precedence levels, etc. In this +chapter, we will add this capability to Kaleidoscope, which will let the +user round out the set of operators that are supported. + +The point of going into user-defined operators in a tutorial like this +is to show the power and flexibility of using a hand-written parser. +Thus far, the parser we have been implementing uses recursive descent +for most parts of the grammar and operator precedence parsing for the +expressions. See `Chapter 2 `_ for details. Without +using operator precedence parsing, it would be very difficult to allow +the programmer to introduce new operators into the grammar: the grammar +is dynamically extensible as the JIT runs. + +The two specific features we'll add are programmable unary operators +(right now, Kaleidoscope has no unary operators at all) as well as +binary operators. An example of this is: + +:: + + # Logical unary not. + def unary!(v) + if v then + 0 + else + 1; + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) + RHS < LHS; + + # Binary "logical or", (note that it does not "short circuit") + def binary| 5 (LHS RHS) + if LHS then + 1 + else if RHS then + 1 + else + 0; + + # Define = with slightly lower precedence than relationals. + def binary= 9 (LHS RHS) + !(LHS < RHS | LHS > RHS); + +Many languages aspire to being able to implement their standard runtime +library in the language itself. In Kaleidoscope, we can implement +significant parts of the language in the library! + +We will break down implementation of these features into two parts: +implementing support for user-defined binary operators and adding unary +operators. + +User-defined Binary Operators +============================= + +Adding support for user-defined binary operators is pretty simple with +our current framework. We'll first add support for the unary/binary +keywords: + +.. code-block:: ocaml + + type token = + ... + (* operators *) + | Binary | Unary + + ... + + and lex_ident buffer = parser + ... + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + +This just adds lexer support for the unary and binary keywords, like we +did in `previous chapters `_. One nice +thing about our current AST, is that we represent binary operators with +full generalisation by using their ASCII code as the opcode. For our +extended operators, we'll use this same representation, so we don't need +any new AST or parser support. + +On the other hand, we have to be able to represent the definitions of +these new operators, in the "def binary\| 5" part of the function +definition. In our grammar so far, the "name" for the function +definition is parsed as the "prototype" production and into the +``Ast.Prototype`` AST node. To represent our new user-defined operators +as prototypes, we have to extend the ``Ast.Prototype`` AST node like +this: + +.. code-block:: ocaml + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = + | Prototype of string * string array + | BinOpPrototype of string * string array * int + +Basically, in addition to knowing a name for the prototype, we now keep +track of whether it was an operator, and if it was, what precedence +level the operator is at. The precedence is only used for binary +operators (as you'll see below, it just doesn't apply for unary +operators). Now that we have a way to represent the prototype for a +user-defined operator, we need to parse it: + +.. code-block:: ocaml + + (* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +This is all fairly straightforward parsing code, and we have already +seen a lot of similar code in the past. One interesting part about the +code above is the couple lines that set up ``name`` for binary +operators. This builds names like "binary@" for a newly defined "@" +operator. This then takes advantage of the fact that symbol names in the +LLVM symbol table are allowed to have any character in them, including +embedded nul characters. + +The next interesting thing to add, is codegen support for these binary +operators. Given our current structure, this is a simple addition of a +default case for our existing binary operator node: + +.. code-block:: ocaml + + let codegen_expr = function + ... + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "binary operator not found!") + in + build_call callee [|lhs_val; rhs_val|] "binop" builder + end + +As you can see above, the new code is actually really simple. It just +does a lookup for the appropriate operator in the symbol table and +generates a function call to it. Since user-defined operators are just +built as normal functions (because the "prototype" boils down to a +function with the right name) everything falls into place. + +The final piece of code we are missing, is a bit of top level magic: + +.. code-block:: ocaml + + let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* If this is an operator, install it. *) + begin match proto with + | Ast.BinOpPrototype (name, args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add Parser.binop_precedence op prec; + | _ -> () + end; + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + ... + +Basically, before codegening a function, if it is a user-defined +operator, we register it in the precedence table. This allows the binary +operator parsing logic we already have in place to handle it. Since we +are working on a fully-general operator precedence parser, this is all +we need to do to "extend the grammar". + +Now we have useful user-defined binary operators. This builds a lot on +the previous framework we built for other operators. Adding unary +operators is a bit more challenging, because we don't have any framework +for it yet - lets see what it takes. + +User-defined Unary Operators +============================ + +Since we don't currently support unary operators in the Kaleidoscope +language, we'll need to add everything to support them. Above, we added +simple support for the 'unary' keyword to the lexer. In addition to +that, we need an AST node: + +.. code-block:: ocaml + + type expr = + ... + (* variant for a unary operator. *) + | Unary of char * expr + ... + +This AST node is very simple and obvious by now. It directly mirrors the +binary operator AST node, except that it only has one child. With this, +we need to add the parsing logic. Parsing a unary operator is pretty +simple: we'll add a new function to do it: + +.. code-block:: ocaml + + (* unary + * ::= primary + * ::= '!' unary *) + and parse_unary = parser + (* If this is a unary operator, read it. *) + | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> + Ast.Unary (op, operand) + + (* If the current token is not an operator, it must be a primary expr. *) + | [< stream >] -> parse_primary stream + +The grammar we add is pretty straightforward here. If we see a unary +operator when parsing a primary operator, we eat the operator as a +prefix and parse the remaining piece as another unary operator. This +allows us to handle multiple unary operators (e.g. "!!x"). Note that +unary operators can't have ambiguous parses like binary operators can, +so there is no need for precedence information. + +The problem with this function, is that we need to call ParseUnary from +somewhere. To do this, we change previous callers of ParsePrimary to +call ``parse_unary`` instead: + +.. code-block:: ocaml + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + ... + (* Parse the unary expression after the binary operator. *) + let rhs = parse_unary stream in + ... + + ... + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream + +With these two simple changes, we are now able to parse unary operators +and build the AST for them. Next up, we need to add parser support for +prototypes, to parse the unary operator prototype. We extend the binary +operator code above with: + +.. code-block:: ocaml + + (* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +As with binary operators, we name unary operators with a name that +includes the operator character. This assists us at code generation +time. Speaking of, the final piece we need to add is codegen support for +unary operators. It looks like this: + +.. code-block:: ocaml + + let rec codegen_expr = function + ... + | Ast.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown unary operator") + in + build_call callee [|operand|] "unop" builder + +This code is similar to, but simpler than, the code for binary +operators. It is simpler primarily because it doesn't need to handle any +predefined operators. + +Kicking the Tires +================= + +It is somewhat hard to believe, but with a few simple extensions we've +covered in the last chapters, we have grown a real-ish language. With +this, we can do a lot of interesting things, including I/O, math, and a +bunch of other things. For example, we can now add a nice sequencing +operator (printd is defined to print out the specified value and a +newline): + +:: + + ready> extern printd(x); + Read extern: declare double @printd(double) + ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands. + .. + ready> printd(123) : printd(456) : printd(789); + 123.000000 + 456.000000 + 789.000000 + Evaluated to 0.000000 + +We can also define a bunch of other "primitive" operations, such as: + +:: + + # Logical unary not. + def unary!(v) + if v then + 0 + else + 1; + + # Unary negate. + def unary-(v) + 0-v; + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) + RHS < LHS; + + # Binary logical or, which does not short circuit. + def binary| 5 (LHS RHS) + if LHS then + 1 + else if RHS then + 1 + else + 0; + + # Binary logical and, which does not short circuit. + def binary& 6 (LHS RHS) + if !LHS then + 0 + else + !!RHS; + + # Define = with slightly lower precedence than relationals. + def binary = 9 (LHS RHS) + !(LHS < RHS | LHS > RHS); + +Given the previous if/then/else support, we can also define interesting +functions for I/O. For example, the following prints out a character +whose "density" reflects the value passed in: the lower the value, the +denser the character: + +:: + + ready> + + extern putchard(char) + def printdensity(d) + if d > 8 then + putchard(32) # ' ' + else if d > 4 then + putchard(46) # '.' + else if d > 2 then + putchard(43) # '+' + else + putchard(42); # '*' + ... + ready> printdensity(1): printdensity(2): printdensity(3) : + printdensity(4): printdensity(5): printdensity(9): putchard(10); + *++.. + Evaluated to 0.000000 + +Based on these simple primitive operations, we can start to define more +interesting things. For example, here's a little function that solves +for the number of iterations it takes a function in the complex plane to +converge: + +:: + + # determine whether the specific location diverges. + # Solve for z = z^2 + c in the complex plane. + def mandleconverger(real imag iters creal cimag) + if iters > 255 | (real*real + imag*imag > 4) then + iters + else + mandleconverger(real*real - imag*imag + creal, + 2*real*imag + cimag, + iters+1, creal, cimag); + + # return the number of iterations required for the iteration to escape + def mandleconverge(real imag) + mandleconverger(real, imag, 0, real, imag); + +This "z = z\ :sup:`2`\ + c" function is a beautiful little creature +that is the basis for computation of the `Mandelbrot +Set `_. Our +``mandelconverge`` function returns the number of iterations that it +takes for a complex orbit to escape, saturating to 255. This is not a +very useful function by itself, but if you plot its value over a +two-dimensional plane, you can see the Mandelbrot set. Given that we are +limited to using putchard here, our amazing graphical output is limited, +but we can whip together something using the density plotter above: + +:: + + # compute and plot the mandlebrot set with the specified 2 dimensional range + # info. + def mandelhelp(xmin xmax xstep ymin ymax ystep) + for y = ymin, y < ymax, ystep in ( + (for x = xmin, x < xmax, xstep in + printdensity(mandleconverge(x,y))) + : putchard(10) + ) + + # mandel - This is a convenient helper function for plotting the mandelbrot set + # from the specified position with the specified Magnification. + def mandel(realstart imagstart realmag imagmag) + mandelhelp(realstart, realstart+realmag*78, realmag, + imagstart, imagstart+imagmag*40, imagmag); + +Given this, we can try plotting out the mandlebrot set! Lets try it out: + +:: + + ready> mandel(-2.3, -1.3, 0.05, 0.07); + *******************************+++++++++++************************************* + *************************+++++++++++++++++++++++******************************* + **********************+++++++++++++++++++++++++++++**************************** + *******************+++++++++++++++++++++.. ...++++++++************************* + *****************++++++++++++++++++++++.... ...+++++++++*********************** + ***************+++++++++++++++++++++++..... ...+++++++++********************* + **************+++++++++++++++++++++++.... ....+++++++++******************** + *************++++++++++++++++++++++...... .....++++++++******************* + ************+++++++++++++++++++++....... .......+++++++****************** + ***********+++++++++++++++++++.... ... .+++++++***************** + **********+++++++++++++++++....... .+++++++**************** + *********++++++++++++++........... ...+++++++*************** + ********++++++++++++............ ...++++++++************** + ********++++++++++... .......... .++++++++************** + *******+++++++++..... .+++++++++************* + *******++++++++...... ..+++++++++************* + *******++++++....... ..+++++++++************* + *******+++++...... ..+++++++++************* + *******.... .... ...+++++++++************* + *******.... . ...+++++++++************* + *******+++++...... ...+++++++++************* + *******++++++....... ..+++++++++************* + *******++++++++...... .+++++++++************* + *******+++++++++..... ..+++++++++************* + ********++++++++++... .......... .++++++++************** + ********++++++++++++............ ...++++++++************** + *********++++++++++++++.......... ...+++++++*************** + **********++++++++++++++++........ .+++++++**************** + **********++++++++++++++++++++.... ... ..+++++++**************** + ***********++++++++++++++++++++++....... .......++++++++***************** + ************+++++++++++++++++++++++...... ......++++++++****************** + **************+++++++++++++++++++++++.... ....++++++++******************** + ***************+++++++++++++++++++++++..... ...+++++++++********************* + *****************++++++++++++++++++++++.... ...++++++++*********************** + *******************+++++++++++++++++++++......++++++++************************* + *********************++++++++++++++++++++++.++++++++*************************** + *************************+++++++++++++++++++++++******************************* + ******************************+++++++++++++************************************ + ******************************************************************************* + ******************************************************************************* + ******************************************************************************* + Evaluated to 0.000000 + ready> mandel(-2, -1, 0.02, 0.04); + **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++ + ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++. + *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++... + *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++..... + ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........ + **************++++++++++++++++++++++++++++++++++++++++++++++++++++++........... + ************+++++++++++++++++++++++++++++++++++++++++++++++++++++.............. + ***********++++++++++++++++++++++++++++++++++++++++++++++++++........ . + **********++++++++++++++++++++++++++++++++++++++++++++++............. + ********+++++++++++++++++++++++++++++++++++++++++++.................. + *******+++++++++++++++++++++++++++++++++++++++....................... + ******+++++++++++++++++++++++++++++++++++........................... + *****++++++++++++++++++++++++++++++++............................ + *****++++++++++++++++++++++++++++............................... + ****++++++++++++++++++++++++++...... ......................... + ***++++++++++++++++++++++++......... ...... ........... + ***++++++++++++++++++++++............ + **+++++++++++++++++++++.............. + **+++++++++++++++++++................ + *++++++++++++++++++................. + *++++++++++++++++............ ... + *++++++++++++++.............. + *+++....++++................ + *.......... ........... + * + *.......... ........... + *+++....++++................ + *++++++++++++++.............. + *++++++++++++++++............ ... + *++++++++++++++++++................. + **+++++++++++++++++++................ + **+++++++++++++++++++++.............. + ***++++++++++++++++++++++............ + ***++++++++++++++++++++++++......... ...... ........... + ****++++++++++++++++++++++++++...... ......................... + *****++++++++++++++++++++++++++++............................... + *****++++++++++++++++++++++++++++++++............................ + ******+++++++++++++++++++++++++++++++++++........................... + *******+++++++++++++++++++++++++++++++++++++++....................... + ********+++++++++++++++++++++++++++++++++++++++++++.................. + Evaluated to 0.000000 + ready> mandel(-0.9, -1.4, 0.02, 0.03); + ******************************************************************************* + ******************************************************************************* + ******************************************************************************* + **********+++++++++++++++++++++************************************************ + *+++++++++++++++++++++++++++++++++++++++*************************************** + +++++++++++++++++++++++++++++++++++++++++++++********************************** + ++++++++++++++++++++++++++++++++++++++++++++++++++***************************** + ++++++++++++++++++++++++++++++++++++++++++++++++++++++************************* + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++********************** + +++++++++++++++++++++++++++++++++.........++++++++++++++++++******************* + +++++++++++++++++++++++++++++++.... ......+++++++++++++++++++**************** + +++++++++++++++++++++++++++++....... ........+++++++++++++++++++************** + ++++++++++++++++++++++++++++........ ........++++++++++++++++++++************ + +++++++++++++++++++++++++++......... .. ...+++++++++++++++++++++********** + ++++++++++++++++++++++++++........... ....++++++++++++++++++++++******** + ++++++++++++++++++++++++............. .......++++++++++++++++++++++****** + +++++++++++++++++++++++............. ........+++++++++++++++++++++++**** + ++++++++++++++++++++++........... ..........++++++++++++++++++++++*** + ++++++++++++++++++++........... .........++++++++++++++++++++++* + ++++++++++++++++++............ ...........++++++++++++++++++++ + ++++++++++++++++............... .............++++++++++++++++++ + ++++++++++++++................. ...............++++++++++++++++ + ++++++++++++.................. .................++++++++++++++ + +++++++++.................. .................+++++++++++++ + ++++++........ . ......... ..++++++++++++ + ++............ ...... ....++++++++++ + .............. ...++++++++++ + .............. ....+++++++++ + .............. .....++++++++ + ............. ......++++++++ + ........... .......++++++++ + ......... ........+++++++ + ......... ........+++++++ + ......... ....+++++++ + ........ ...+++++++ + ....... ...+++++++ + ....+++++++ + .....+++++++ + ....+++++++ + ....+++++++ + ....+++++++ + Evaluated to 0.000000 + ready> ^D + +At this point, you may be starting to realize that Kaleidoscope is a +real and powerful language. It may not be self-similar :), but it can be +used to plot things that are! + +With this, we conclude the "adding user-defined operators" chapter of +the tutorial. We have successfully augmented our language, adding the +ability to extend the language in the library, and we have shown how +this can be used to build a simple but interesting end-user application +in Kaleidoscope. At this point, Kaleidoscope can build a variety of +applications that are functional and can call functions with +side-effects, but it can't actually define and mutate a variable itself. + +Strikingly, variable mutation is an important feature of some languages, +and it is not at all obvious how to `add support for mutable +variables `_ without having to add an "SSA +construction" phase to your front-end. In the next chapter, we will +describe how you can add variable mutation without building SSA in your +front-end. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions.. To build this example, use: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + <*.{byte,native}>: g++, use_llvm, use_llvm_analysis + <*.{byte,native}>: use_llvm_executionengine, use_llvm_target + <*.{byte,native}>: use_llvm_scalar_opts, use_bindings + +myocamlbuild.ml: + .. code-block:: ocaml + + open Ocamlbuild_plugin;; + + ocaml_lib ~extern:true "llvm";; + ocaml_lib ~extern:true "llvm_analysis";; + ocaml_lib ~extern:true "llvm_executionengine";; + ocaml_lib ~extern:true "llvm_target";; + ocaml_lib ~extern:true "llvm_scalar_opts";; + + flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);; + dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In + + (* operators *) + | Binary | Unary + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a unary operator. *) + | Unary of char * expr + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = + | Prototype of string * string array + | BinOpPrototype of string * string array * int + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* unary + * ::= primary + * ::= '!' unary *) + and parse_unary = parser + (* If this is a unary operator, read it. *) + | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> + Ast.Unary (op, operand) + + (* If the current token is not an operator, it must be a primary expr. *) + | [< stream >] -> parse_primary stream + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the unary expression after the binary operator. *) + let rhs = parse_unary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +codegen.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + + open Llvm + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + + let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown unary operator") + in + build_call callee [|operand|] "unop" builder + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "binary operator not found!") + in + build_call callee [|lhs_val; rhs_val|] "binop" builder + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let preheader_bb = insertion_block builder in + let the_function = block_parent preheader_bb in + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Start the PHI node with an entry for start. *) + let variable = build_phi [(start_val, preheader_bb)] var_name builder in + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name variable; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + let next_var = build_add variable step_val "nextvar" builder in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let loop_end_bb = insertion_block builder in + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Add a new entry to the PHI node for the backedge. *) + add_incoming (next_var, loop_end_bb) variable; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + + let codegen_proto = function + | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + + let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* If this is an operator, install it. *) + begin match proto with + | Ast.BinOpPrototype (name, args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add Parser.binop_precedence op prec; + | _ -> () + end; + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + + (* top ::= definition | external | expression | ';' *) + let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + open Llvm_target + open Llvm_scalar_opts + + let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module + ;; + + main () + +bindings.c + .. code-block:: c + + #include + + /* putchard - putchar that takes a double and returns 0. */ + extern double putchard(double X) { + putchar((char)X); + return 0; + } + + /* printd - printf that takes a double prints it as "%f\n", returning 0. */ + extern double printd(double X) { + printf("%f\n", X); + return 0; + } + +`Next: Extending the language: mutable variables / SSA +construction `_ + diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html deleted file mode 100644 index aa30555a1d40..000000000000 --- a/docs/tutorial/OCamlLangImpl7.html +++ /dev/null @@ -1,1904 +0,0 @@ - - - - - Kaleidoscope: Extending the Language: Mutable Variables / SSA - construction - - - - - - - - -

Kaleidoscope: Extending the Language: Mutable Variables

- - - -
-

- Written by Chris Lattner - and Erick Tryzelaar -

-
- - -

Chapter 7 Introduction

- - -
- -

Welcome to Chapter 7 of the "Implementing a language -with LLVM" tutorial. In chapters 1 through 6, we've built a very -respectable, albeit simple, functional -programming language. In our journey, we learned some parsing techniques, -how to build and represent an AST, how to build LLVM IR, and how to optimize -the resultant code as well as JIT compile it.

- -

While Kaleidoscope is interesting as a functional language, the fact that it -is functional makes it "too easy" to generate LLVM IR for it. In particular, a -functional language makes it very easy to build LLVM IR directly in SSA form. -Since LLVM requires that the input code be in SSA form, this is a very nice -property and it is often unclear to newcomers how to generate code for an -imperative language with mutable variables.

- -

The short (and happy) summary of this chapter is that there is no need for -your front-end to build SSA form: LLVM provides highly tuned and well tested -support for this, though the way it works is a bit unexpected for some.

- -
- - -

Why is this a hard problem?

- - -
- -

-To understand why mutable variables cause complexities in SSA construction, -consider this extremely simple C example: -

- -
-
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-
-
- -

In this case, we have the variable "X", whose value depends on the path -executed in the program. Because there are two different possible values for X -before the return instruction, a PHI node is inserted to merge the two values. -The LLVM IR that we want for this example looks like this:

- -
-
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-
-
- -

In this example, the loads from the G and H global variables are explicit in -the LLVM IR, and they live in the then/else branches of the if statement -(cond_true/cond_false). In order to merge the incoming values, the X.2 phi node -in the cond_next block selects the right value to use based on where control -flow is coming from: if control flow comes from the cond_false block, X.2 gets -the value of X.1. Alternatively, if control flow comes from cond_true, it gets -the value of X.0. The intent of this chapter is not to explain the details of -SSA form. For more information, see one of the many online -references.

- -

The question for this article is "who places the phi nodes when lowering -assignments to mutable variables?". The issue here is that LLVM -requires that its IR be in SSA form: there is no "non-ssa" mode for it. -However, SSA construction requires non-trivial algorithms and data structures, -so it is inconvenient and wasteful for every front-end to have to reproduce this -logic.

- -
- - -

Memory in LLVM

- - -
- -

The 'trick' here is that while LLVM does require all register values to be -in SSA form, it does not require (or permit) memory objects to be in SSA form. -In the example above, note that the loads from G and H are direct accesses to -G and H: they are not renamed or versioned. This differs from some other -compiler systems, which do try to version memory objects. In LLVM, instead of -encoding dataflow analysis of memory into the LLVM IR, it is handled with Analysis Passes which are computed on -demand.

- -

-With this in mind, the high-level idea is that we want to make a stack variable -(which lives in memory, because it is on the stack) for each mutable object in -a function. To take advantage of this trick, we need to talk about how LLVM -represents stack variables. -

- -

In LLVM, all memory accesses are explicit with load/store instructions, and -it is carefully designed not to have (or need) an "address-of" operator. Notice -how the type of the @G/@H global variables is actually "i32*" even though the -variable is defined as "i32". What this means is that @G defines space -for an i32 in the global data area, but its name actually refers to the -address for that space. Stack variables work the same way, except that instead of -being declared with global variable definitions, they are declared with the -LLVM alloca instruction:

- -
-
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-
-
- -

This code shows an example of how you can declare and manipulate a stack -variable in the LLVM IR. Stack memory allocated with the alloca instruction is -fully general: you can pass the address of the stack slot to functions, you can -store it in other variables, etc. In our example above, we could rewrite the -example to use the alloca technique to avoid using a PHI node:

- -
-
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-        store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-        store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-
-
- -

With this, we have discovered a way to handle arbitrary mutable variables -without the need to create Phi nodes at all:

- -
    -
  1. Each mutable variable becomes a stack allocation.
  2. -
  3. Each read of the variable becomes a load from the stack.
  4. -
  5. Each update of the variable becomes a store to the stack.
  6. -
  7. Taking the address of a variable just uses the stack address directly.
  8. -
- -

While this solution has solved our immediate problem, it introduced another -one: we have now apparently introduced a lot of stack traffic for very simple -and common operations, a major performance problem. Fortunately for us, the -LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles -this case, promoting allocas like this into SSA registers, inserting Phi nodes -as appropriate. If you run this example through the pass, for example, you'll -get:

- -
-
-$ llvm-as < example.ll | opt -mem2reg | llvm-dis
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-
-
- -

The mem2reg pass implements the standard "iterated dominance frontier" -algorithm for constructing SSA form and has a number of optimizations that speed -up (very common) degenerate cases. The mem2reg optimization pass is the answer -to dealing with mutable variables, and we highly recommend that you depend on -it. Note that mem2reg only works on variables in certain circumstances:

- -
    -
  1. mem2reg is alloca-driven: it looks for allocas and if it can handle them, it -promotes them. It does not apply to global variables or heap allocations.
  2. - -
  3. mem2reg only looks for alloca instructions in the entry block of the -function. Being in the entry block guarantees that the alloca is only executed -once, which makes analysis simpler.
  4. - -
  5. mem2reg only promotes allocas whose uses are direct loads and stores. If -the address of the stack object is passed to a function, or if any funny pointer -arithmetic is involved, the alloca will not be promoted.
  6. - -
  7. mem2reg only works on allocas of first class -values (such as pointers, scalars and vectors), and only if the array size -of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of -promoting structs or arrays to registers. Note that the "scalarrepl" pass is -more powerful and can promote structs, "unions", and arrays in many cases.
  8. - -
- -

-All of these properties are easy to satisfy for most imperative languages, and -we'll illustrate it below with Kaleidoscope. The final question you may be -asking is: should I bother with this nonsense for my front-end? Wouldn't it be -better if I just did SSA construction directly, avoiding use of the mem2reg -optimization pass? In short, we strongly recommend that you use this technique -for building SSA form, unless there is an extremely good reason not to. Using -this technique is:

- -
    -
  • Proven and well tested: llvm-gcc and clang both use this technique for local -mutable variables. As such, the most common clients of LLVM are using this to -handle a bulk of their variables. You can be sure that bugs are found fast and -fixed early.
  • - -
  • Extremely Fast: mem2reg has a number of special cases that make it fast in -common cases as well as fully general. For example, it has fast-paths for -variables that are only used in a single block, variables that only have one -assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc. -
  • - -
  • Needed for debug info generation: -Debug information in LLVM relies on having the address of the variable -exposed so that debug info can be attached to it. This technique dovetails -very naturally with this style of debug info.
  • -
- -

If nothing else, this makes it much easier to get your front-end up and -running, and is very simple to implement. Lets extend Kaleidoscope with mutable -variables now! -

- -
- - -

Mutable Variables in Kaleidoscope

- - -
- -

Now that we know the sort of problem we want to tackle, lets see what this -looks like in the context of our little Kaleidoscope language. We're going to -add two features:

- -
    -
  1. The ability to mutate variables with the '=' operator.
  2. -
  3. The ability to define new variables.
  4. -
- -

While the first item is really what this is about, we only have variables -for incoming arguments as well as for induction variables, and redefining those only -goes so far :). Also, the ability to define new variables is a -useful thing regardless of whether you will be mutating them. Here's a -motivating example that shows how we could use these:

- -
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x < 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  var a = 1, b = 1, c in
-  (for i = 3, i < x in
-     c = a + b :
-     a = b :
-     b = c) :
-  b;
-
-# Call it.
-fibi(10);
-
-
- -

-In order to mutate variables, we have to change our existing variables to use -the "alloca trick". Once we have that, we'll add our new operator, then extend -Kaleidoscope to support new variable definitions. -

- -
- - -

Adjusting Existing Variables for Mutation

- - -
- -

-The symbol table in Kaleidoscope is managed at code generation time by the -'named_values' map. This map currently keeps track of the LLVM -"Value*" that holds the double value for the named variable. In order to -support mutation, we need to change this slightly, so that it -named_values holds the memory location of the variable in -question. Note that this change is a refactoring: it changes the structure of -the code, but does not (by itself) change the behavior of the compiler. All of -these changes are isolated in the Kaleidoscope code generator.

- -

-At this point in Kaleidoscope's development, it only supports variables for two -things: incoming arguments to functions and the induction variable of 'for' -loops. For consistency, we'll allow mutation of these variables in addition to -other user-defined variables. This means that these will both need memory -locations. -

- -

To start our transformation of Kaleidoscope, we'll change the -named_values map so that it maps to AllocaInst* instead of Value*. -Once we do this, the C++ compiler will tell us what parts of the code we need to -update:

- -

Note: the ocaml bindings currently model both Value*s and -AllocInst*s as Llvm.llvalues, but this may change in the -future to be more type safe.

- -
-
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-
-
- -

Also, since we will need to create these alloca's, we'll use a helper -function that ensures that the allocas are created in the entry block of the -function:

- -
-
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-
-
- -

This funny looking code creates an Llvm.llbuilder object that is -pointing at the first instruction of the entry block. It then creates an alloca -with the expected name and returns it. Because all values in Kaleidoscope are -doubles, there is no need to pass in a type to use.

- -

With this in place, the first functionality change we want to make is to -variable references. In our new scheme, variables live on the stack, so code -generating a reference to them actually needs to produce a load from the stack -slot:

- -
-
-let rec codegen_expr = function
-  ...
-  | Ast.Variable name ->
-      let v = try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name")
-      in
-      (* Load the value. *)
-      build_load v name builder
-
-
- -

As you can see, this is pretty straightforward. Now we need to update the -things that define the variables to set up the alloca. We'll start with -codegen_expr Ast.For ... (see the full code listing -for the unabridged code):

- -
-
-  | Ast.For (var_name, start, end_, step, body) ->
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      let alloca = create_entry_block_alloca the_function var_name in
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);
-
-      ...
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -> None
-      in
-      Hashtbl.add named_values var_name alloca;
-
-      ...
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);
-      ...
-
-
- -

This code is virtually identical to the code before we allowed mutable variables. -The big difference is that we no longer have to construct a PHI node, and we use -load/store to access the variable as needed.

- -

To support mutable argument variables, we need to also make allocas for them. -The code for this is also pretty simple:

- -
-
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
-  in
-  Array.iteri (fun i ai ->
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-
-
- -

For each argument, we make an alloca, store the input value to the function -into the alloca, and register the alloca as the memory location for the -argument. This method gets invoked by Codegen.codegen_func right after -it sets up the entry block for the function.

- -

The final missing piece is adding the mem2reg pass, which allows us to get -good codegen once again:

- -
-
-let main () =
-  ...
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-
- -

It is interesting to see what the code looks like before and after the -mem2reg optimization runs. For example, this is the before/after code for our -recursive fib function. Before the optimization:

- -
-
-define double @fib(double %x) {
-entry:
-  %x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  br label %ifcont
-
-else:    ; preds = %entry
-  %x3 = load double* %x1
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %x4 = load double* %x1
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-
-
- -

Here there is only one variable (x, the input argument) but you can still -see the extremely simple-minded code generation strategy we are using. In the -entry block, an alloca is created, and the initial input value is stored into -it. Each reference to the variable does a reload from the stack. Also, note -that we didn't modify the if/then/else expression, so it still inserts a PHI -node. While we could make an alloca for it, it is actually easier to create a -PHI node for it, so we still just make the PHI.

- -

Here is the code after the mem2reg pass runs:

- -
-
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-
-
- -

This is a trivial case for mem2reg, since there are no redefinitions of the -variable. The point of showing this is to calm your tension about inserting -such blatent inefficiencies :).

- -

After the rest of the optimizers run, we get:

- -
-
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-
-
- -

Here we see that the simplifycfg pass decided to clone the return instruction -into the end of the 'else' block. This allowed it to eliminate some branches -and the PHI node.

- -

Now that all symbol table references are updated to use stack variables, -we'll add the assignment operator.

- -
- - -

New Assignment Operator

- - -
- -

With our current framework, adding a new assignment operator is really -simple. We will parse it just like any other binary operator, but handle it -internally (instead of allowing the user to define it). The first step is to -set a precedence:

- -
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '=' 2;
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  ...
-
-
- -

Now that the parser knows the precedence of the binary operator, it takes -care of all the parsing and AST generation. We just need to implement codegen -for the assignment operator. This looks like:

- -
-
-let rec codegen_expr = function
-      begin match op with
-      | '=' ->
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -> name
-            | _ -> raise (Error "destination of '=' must be a variable")
-          in
-
-
- -

Unlike the rest of the binary operators, our assignment operator doesn't -follow the "emit LHS, emit RHS, do computation" model. As such, it is handled -as a special case before the other binary operators are handled. The other -strange thing is that it requires the LHS to be a variable. It is invalid to -have "(x+1) = expr" - only things like "x = expr" are allowed. -

- - -
-
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -> raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ ->
-			...
-
-
- -

Once we have the variable, codegen'ing the assignment is straightforward: -we emit the RHS of the assignment, create a store, and return the computed -value. Returning a value allows for chained assignments like "X = (Y = Z)".

- -

Now that we have an assignment operator, we can mutate loop variables and -arguments. For example, we can now run code like this:

- -
-
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-
-
- -

When run, this example prints "123" and then "4", showing that we did -actually mutate the value! Okay, we have now officially implemented our goal: -getting this to work requires SSA construction in the general case. However, -to be really useful, we want the ability to define our own local variables, lets -add this next! -

- -
- - -

User-defined Local Variables

- - -
- -

Adding var/in is just like any other other extensions we made to -Kaleidoscope: we extend the lexer, the parser, the AST and the code generator. -The first step for adding our new 'var/in' construct is to extend the lexer. -As before, this is pretty trivial, the code looks like this:

- -
-
-type token =
-  ...
-  (* var definition *)
-  | Var
-
-...
-
-and lex_ident buffer = parser
-      ...
-      | "in" -> [< 'Token.In; stream >]
-      | "binary" -> [< 'Token.Binary; stream >]
-      | "unary" -> [< 'Token.Unary; stream >]
-      | "var" -> [< 'Token.Var; stream >]
-      ...
-
-
- -

The next step is to define the AST node that we will construct. For var/in, -it looks like this:

- -
-
-type expr =
-  ...
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-  ...
-
-
- -

var/in allows a list of names to be defined all at once, and each name can -optionally have an initializer value. As such, we capture this information in -the VarNames vector. Also, var/in has a body, this body is allowed to access -the variables defined by the var/in.

- -

With this in place, we can define the parser pieces. The first thing we do -is add it as a primary expression:

- -
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- *   ::= varexpr *)
-let rec parse_primary = parser
-  ...
-  (* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [< 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr >] ->
-      Ast.Var (Array.of_list (List.rev var_names), body)
-
-...
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
-  | [< >] -> None
-
-and parse_var_names accumulator = parser
-  | [< 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) >] -> e
-  | [< >] -> accumulator
-
-
- -

Now that we can parse and represent the code, we need to support emission of -LLVM IR for it. This code starts out with:

- -
-
-let rec codegen_expr = function
-  ...
-  | Ast.Var (var_names, body)
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) ->
-
-
- -

Basically it loops over all the variables, installing them one at a time. -For each variable we put into the symbol table, we remember the previous value -that we replace in OldBindings.

- -
-
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -> codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -> const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found > ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-
-
- -

There are more comments here than code. The basic idea is that we emit the -initializer, create the alloca, then update the symbol table to point to it. -Once all the variables are installed in the symbol table, we evaluate the body -of the var/in expression:

- -
-
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-
-
- -

Finally, before returning, we restore the previous variable bindings:

- -
-
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) ->
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-
-
- -

The end result of all of this is that we get properly scoped variable -definitions, and we even (trivially) allow mutation of them :).

- -

With this, we completed what we set out to do. Our nice iterative fib -example from the intro compiles and runs just fine. The mem2reg pass optimizes -all of our stack variables into SSA registers, inserting PHI nodes where needed, -and our front-end remains simple: no "iterated dominance frontier" computation -anywhere in sight.

- -
- - -

Full Code Listing

- - -
- -

-Here is the complete code listing for our running example, enhanced with mutable -variables and var/in support. To build this example, use: -

- -
-
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-
-
- -

Here is the code:

- -
-
_tags:
-
-
-<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
-<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
-<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
-<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
-
-
- -
myocamlbuild.ml:
-
-
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-
-
- -
token.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-
-  (* var definition *)
-  | Var
-
-
- -
lexer.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [< ' ('0' .. '9' as c); stream >] ->
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [< ' ('#'); stream >] ->
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [< 'c; stream >] ->
-      [< 'Token.Kwd c; lex stream >]
-
-  (* end of stream. *)
-  | [< >] -> [< >]
-
-and lex_number buffer = parser
-  | [< ' ('0' .. '9' | '.' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [< stream=lex >] ->
-      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
-
-and lex_ident buffer = parser
-  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [< stream=lex >] ->
-      match Buffer.contents buffer with
-      | "def" -> [< 'Token.Def; stream >]
-      | "extern" -> [< 'Token.Extern; stream >]
-      | "if" -> [< 'Token.If; stream >]
-      | "then" -> [< 'Token.Then; stream >]
-      | "else" -> [< 'Token.Else; stream >]
-      | "for" -> [< 'Token.For; stream >]
-      | "in" -> [< 'Token.In; stream >]
-      | "binary" -> [< 'Token.Binary; stream >]
-      | "unary" -> [< 'Token.Unary; stream >]
-      | "var" -> [< 'Token.Var; stream >]
-      | id -> [< 'Token.Ident id; stream >]
-
-and lex_comment = parser
-  | [< ' ('\n'); stream=lex >] -> stream
-  | [< 'c; e=lex_comment >] -> e
-  | [< >] -> [< >]
-
-
- -
ast.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-
-
- -
parser.ml:
-
-
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- *   ::= varexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [< 'Token.Number n >] -> Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [< 'Token.Ident id; stream >] ->
-      let rec parse_args accumulator = parser
-        | [< e=parse_expr; stream >] ->
-            begin parser
-              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
-              | [< >] -> e :: accumulator
-            end stream
-        | [< >] -> accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [< 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'">] ->
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [< >] -> Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [< 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [< 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream >] ->
-      begin parser
-        | [<
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream >] ->
-            let step =
-              begin parser
-              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
-              | [< >] -> None
-              end stream
-            in
-            begin parser
-            | [< 'Token.In; body=parse_expr >] ->
-                Ast.For (id, start, end_, step, body)
-            | [< >] ->
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [< >] ->
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  (* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [< 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr >] ->
-      Ast.Var (Array.of_list (List.rev var_names), body)
-
-  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [< stream >] -> parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec < expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) ->
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec < next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -> rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -> lhs
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
-  | [< >] -> None
-
-and parse_var_names accumulator = parser
-  | [< 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) >] -> e
-  | [< >] -> accumulator
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
-    | [< >] -> accumulator
-  in
-  let parse_operator = parser
-    | [< 'Token.Unary >] -> "unary", 1
-    | [< 'Token.Binary >] -> "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [< 'Token.Number n >] -> int_of_float n
-    | [< >] -> 30
-  in
-  parser
-  | [< 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [< (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [< >] ->
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [< e=parse_expr >] ->
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [< 'Token.Extern; e=parse_prototype >] -> e
-
-
- -
codegen.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at context (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-
-let rec codegen_expr = function
-  | Ast.Number n -> const_float double_type n
-  | Ast.Variable name ->
-      let v = try Hashtbl.find named_values name with
-        | Not_found -> raise (Error "unknown variable name")
-      in
-      (* Load the value. *)
-      build_load v name builder
-  | Ast.Unary (op, operand) ->
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) ->
-      begin match op with
-      | '=' ->
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -> name
-            | _ -> raise (Error "destination of '=' must be a variable")
-          in
-
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -> raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ ->
-          let lhs_val = codegen_expr lhs in
-          let rhs_val = codegen_expr rhs in
-          begin
-            match op with
-            | '+' -> build_add lhs_val rhs_val "addtmp" builder
-            | '-' -> build_sub lhs_val rhs_val "subtmp" builder
-            | '*' -> build_mul lhs_val rhs_val "multmp" builder
-            | '<' ->
-                (* Convert bool 0/1 to double 0.0 or 1.0 *)
-                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-                build_uitofp i double_type "booltmp" builder
-            | _ ->
-                (* If it wasn't a builtin binary operator, it must be a user defined
-                 * one. Emit a call to it. *)
-                let callee = "binary" ^ (String.make 1 op) in
-                let callee =
-                  match lookup_function callee the_module with
-                  | Some callee -> callee
-                  | None -> raise (Error "binary operator not found!")
-                in
-                build_call callee [|lhs_val; rhs_val|] "binop" builder
-          end
-      end
-  | Ast.Call (callee, args) ->
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -> callee
-        | None -> raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) ->
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) ->
-      (* Output this as:
-       *   var = alloca double
-       *   ...
-       *   start = startexpr
-       *   store start -> var
-       *   goto loop
-       * loop:
-       *   ...
-       *   bodyexpr
-       *   ...
-       * loopend:
-       *   step = stepexpr
-       *   endcond = endexpr
-       *
-       *   curvar = load var
-       *   nextvar = curvar + step
-       *   store nextvar -> var
-       *   br endcond, loop, endloop
-       * outloop: *)
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      let alloca = create_entry_block_alloca the_function var_name in
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -> None
-      in
-      Hashtbl.add named_values var_name alloca;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -> codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -> const_float double_type 1.0
-      in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -> Hashtbl.add named_values var_name old_val
-      | None -> ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-  | Ast.Var (var_names, body) ->
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) ->
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -> codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -> const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found -> ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) ->
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -> declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f ->
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f <> At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) <> ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a ->
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
-  in
-  Array.iteri (fun i ai ->
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) ->
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) ->
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -> ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        (* Add all arguments to the symbol table and create their allocas. *)
-        create_argument_allocas the_function proto;
-
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e ->
-        delete_function the_function;
-        raise e
-
-
- -
toplevel.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -> ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') ->
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token ->
-      begin
-        try match token with
-        | Token.Def ->
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern ->
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ ->
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s ->
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready> "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-
-
- -
toy.ml:
-
-
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '=' 2;
-  Hashtbl.add Parser.binop_precedence '<' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready> "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-
-
- -
bindings.c
-
-
-#include <stdio.h>
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-
-
- -Next: Conclusion and other useful LLVM tidbits -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Erick Tryzelaar
- Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $ -
- - diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst new file mode 100644 index 000000000000..cfb49312c50f --- /dev/null +++ b/docs/tutorial/OCamlLangImpl7.rst @@ -0,0 +1,1723 @@ +======================================================= +Kaleidoscope: Extending the Language: Mutable Variables +======================================================= + +.. contents:: + :local: + +Chapter 7 Introduction +====================== + +Welcome to Chapter 7 of the "`Implementing a language with +LLVM `_" tutorial. In chapters 1 through 6, we've built a +very respectable, albeit simple, `functional programming +language `_. In our +journey, we learned some parsing techniques, how to build and represent +an AST, how to build LLVM IR, and how to optimize the resultant code as +well as JIT compile it. + +While Kaleidoscope is interesting as a functional language, the fact +that it is functional makes it "too easy" to generate LLVM IR for it. In +particular, a functional language makes it very easy to build LLVM IR +directly in `SSA +form `_. +Since LLVM requires that the input code be in SSA form, this is a very +nice property and it is often unclear to newcomers how to generate code +for an imperative language with mutable variables. + +The short (and happy) summary of this chapter is that there is no need +for your front-end to build SSA form: LLVM provides highly tuned and +well tested support for this, though the way it works is a bit +unexpected for some. + +Why is this a hard problem? +=========================== + +To understand why mutable variables cause complexities in SSA +construction, consider this extremely simple C example: + +.. code-block:: c + + int G, H; + int test(_Bool Condition) { + int X; + if (Condition) + X = G; + else + X = H; + return X; + } + +In this case, we have the variable "X", whose value depends on the path +executed in the program. Because there are two different possible values +for X before the return instruction, a PHI node is inserted to merge the +two values. The LLVM IR that we want for this example looks like this: + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32* + @H = weak global i32 0 ; type of @H is i32* + + define i32 @test(i1 %Condition) { + entry: + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + br label %cond_next + + cond_false: + %X.1 = load i32* @H + br label %cond_next + + cond_next: + %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ] + ret i32 %X.2 + } + +In this example, the loads from the G and H global variables are +explicit in the LLVM IR, and they live in the then/else branches of the +if statement (cond\_true/cond\_false). In order to merge the incoming +values, the X.2 phi node in the cond\_next block selects the right value +to use based on where control flow is coming from: if control flow comes +from the cond\_false block, X.2 gets the value of X.1. Alternatively, if +control flow comes from cond\_true, it gets the value of X.0. The intent +of this chapter is not to explain the details of SSA form. For more +information, see one of the many `online +references `_. + +The question for this article is "who places the phi nodes when lowering +assignments to mutable variables?". The issue here is that LLVM +*requires* that its IR be in SSA form: there is no "non-ssa" mode for +it. However, SSA construction requires non-trivial algorithms and data +structures, so it is inconvenient and wasteful for every front-end to +have to reproduce this logic. + +Memory in LLVM +============== + +The 'trick' here is that while LLVM does require all register values to +be in SSA form, it does not require (or permit) memory objects to be in +SSA form. In the example above, note that the loads from G and H are +direct accesses to G and H: they are not renamed or versioned. This +differs from some other compiler systems, which do try to version memory +objects. In LLVM, instead of encoding dataflow analysis of memory into +the LLVM IR, it is handled with `Analysis +Passes <../WritingAnLLVMPass.html>`_ which are computed on demand. + +With this in mind, the high-level idea is that we want to make a stack +variable (which lives in memory, because it is on the stack) for each +mutable object in a function. To take advantage of this trick, we need +to talk about how LLVM represents stack variables. + +In LLVM, all memory accesses are explicit with load/store instructions, +and it is carefully designed not to have (or need) an "address-of" +operator. Notice how the type of the @G/@H global variables is actually +"i32\*" even though the variable is defined as "i32". What this means is +that @G defines *space* for an i32 in the global data area, but its +*name* actually refers to the address for that space. Stack variables +work the same way, except that instead of being declared with global +variable definitions, they are declared with the `LLVM alloca +instruction <../LangRef.html#i_alloca>`_: + +.. code-block:: llvm + + define i32 @example() { + entry: + %X = alloca i32 ; type of %X is i32*. + ... + %tmp = load i32* %X ; load the stack value %X from the stack. + %tmp2 = add i32 %tmp, 1 ; increment it + store i32 %tmp2, i32* %X ; store it back + ... + +This code shows an example of how you can declare and manipulate a stack +variable in the LLVM IR. Stack memory allocated with the alloca +instruction is fully general: you can pass the address of the stack slot +to functions, you can store it in other variables, etc. In our example +above, we could rewrite the example to use the alloca technique to avoid +using a PHI node: + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32* + @H = weak global i32 0 ; type of @H is i32* + + define i32 @test(i1 %Condition) { + entry: + %X = alloca i32 ; type of %X is i32*. + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + store i32 %X.0, i32* %X ; Update X + br label %cond_next + + cond_false: + %X.1 = load i32* @H + store i32 %X.1, i32* %X ; Update X + br label %cond_next + + cond_next: + %X.2 = load i32* %X ; Read X + ret i32 %X.2 + } + +With this, we have discovered a way to handle arbitrary mutable +variables without the need to create Phi nodes at all: + +#. Each mutable variable becomes a stack allocation. +#. Each read of the variable becomes a load from the stack. +#. Each update of the variable becomes a store to the stack. +#. Taking the address of a variable just uses the stack address + directly. + +While this solution has solved our immediate problem, it introduced +another one: we have now apparently introduced a lot of stack traffic +for very simple and common operations, a major performance problem. +Fortunately for us, the LLVM optimizer has a highly-tuned optimization +pass named "mem2reg" that handles this case, promoting allocas like this +into SSA registers, inserting Phi nodes as appropriate. If you run this +example through the pass, for example, you'll get: + +.. code-block:: bash + + $ llvm-as < example.ll | opt -mem2reg | llvm-dis + @G = weak global i32 0 + @H = weak global i32 0 + + define i32 @test(i1 %Condition) { + entry: + br i1 %Condition, label %cond_true, label %cond_false + + cond_true: + %X.0 = load i32* @G + br label %cond_next + + cond_false: + %X.1 = load i32* @H + br label %cond_next + + cond_next: + %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ] + ret i32 %X.01 + } + +The mem2reg pass implements the standard "iterated dominance frontier" +algorithm for constructing SSA form and has a number of optimizations +that speed up (very common) degenerate cases. The mem2reg optimization +pass is the answer to dealing with mutable variables, and we highly +recommend that you depend on it. Note that mem2reg only works on +variables in certain circumstances: + +#. mem2reg is alloca-driven: it looks for allocas and if it can handle + them, it promotes them. It does not apply to global variables or heap + allocations. +#. mem2reg only looks for alloca instructions in the entry block of the + function. Being in the entry block guarantees that the alloca is only + executed once, which makes analysis simpler. +#. mem2reg only promotes allocas whose uses are direct loads and stores. + If the address of the stack object is passed to a function, or if any + funny pointer arithmetic is involved, the alloca will not be + promoted. +#. mem2reg only works on allocas of `first + class <../LangRef.html#t_classifications>`_ values (such as pointers, + scalars and vectors), and only if the array size of the allocation is + 1 (or missing in the .ll file). mem2reg is not capable of promoting + structs or arrays to registers. Note that the "scalarrepl" pass is + more powerful and can promote structs, "unions", and arrays in many + cases. + +All of these properties are easy to satisfy for most imperative +languages, and we'll illustrate it below with Kaleidoscope. The final +question you may be asking is: should I bother with this nonsense for my +front-end? Wouldn't it be better if I just did SSA construction +directly, avoiding use of the mem2reg optimization pass? In short, we +strongly recommend that you use this technique for building SSA form, +unless there is an extremely good reason not to. Using this technique +is: + +- Proven and well tested: llvm-gcc and clang both use this technique + for local mutable variables. As such, the most common clients of LLVM + are using this to handle a bulk of their variables. You can be sure + that bugs are found fast and fixed early. +- Extremely Fast: mem2reg has a number of special cases that make it + fast in common cases as well as fully general. For example, it has + fast-paths for variables that are only used in a single block, + variables that only have one assignment point, good heuristics to + avoid insertion of unneeded phi nodes, etc. +- Needed for debug info generation: `Debug information in + LLVM <../SourceLevelDebugging.html>`_ relies on having the address of + the variable exposed so that debug info can be attached to it. This + technique dovetails very naturally with this style of debug info. + +If nothing else, this makes it much easier to get your front-end up and +running, and is very simple to implement. Lets extend Kaleidoscope with +mutable variables now! + +Mutable Variables in Kaleidoscope +================================= + +Now that we know the sort of problem we want to tackle, lets see what +this looks like in the context of our little Kaleidoscope language. +We're going to add two features: + +#. The ability to mutate variables with the '=' operator. +#. The ability to define new variables. + +While the first item is really what this is about, we only have +variables for incoming arguments as well as for induction variables, and +redefining those only goes so far :). Also, the ability to define new +variables is a useful thing regardless of whether you will be mutating +them. Here's a motivating example that shows how we could use these: + +:: + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y; + + # Recursive fib, we could do this before. + def fib(x) + if (x < 3) then + 1 + else + fib(x-1)+fib(x-2); + + # Iterative fib. + def fibi(x) + var a = 1, b = 1, c in + (for i = 3, i < x in + c = a + b : + a = b : + b = c) : + b; + + # Call it. + fibi(10); + +In order to mutate variables, we have to change our existing variables +to use the "alloca trick". Once we have that, we'll add our new +operator, then extend Kaleidoscope to support new variable definitions. + +Adjusting Existing Variables for Mutation +========================================= + +The symbol table in Kaleidoscope is managed at code generation time by +the '``named_values``' map. This map currently keeps track of the LLVM +"Value\*" that holds the double value for the named variable. In order +to support mutation, we need to change this slightly, so that it +``named_values`` holds the *memory location* of the variable in +question. Note that this change is a refactoring: it changes the +structure of the code, but does not (by itself) change the behavior of +the compiler. All of these changes are isolated in the Kaleidoscope code +generator. + +At this point in Kaleidoscope's development, it only supports variables +for two things: incoming arguments to functions and the induction +variable of 'for' loops. For consistency, we'll allow mutation of these +variables in addition to other user-defined variables. This means that +these will both need memory locations. + +To start our transformation of Kaleidoscope, we'll change the +``named_values`` map so that it maps to AllocaInst\* instead of Value\*. +Once we do this, the C++ compiler will tell us what parts of the code we +need to update: + +**Note:** the ocaml bindings currently model both ``Value*``'s and +``AllocInst*``'s as ``Llvm.llvalue``'s, but this may change in the future +to be more type safe. + +.. code-block:: ocaml + + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + +Also, since we will need to create these alloca's, we'll use a helper +function that ensures that the allocas are created in the entry block of +the function: + +.. code-block:: ocaml + + (* Create an alloca instruction in the entry block of the function. This + * is used for mutable variables etc. *) + let create_entry_block_alloca the_function var_name = + let builder = builder_at (instr_begin (entry_block the_function)) in + build_alloca double_type var_name builder + +This funny looking code creates an ``Llvm.llbuilder`` object that is +pointing at the first instruction of the entry block. It then creates an +alloca with the expected name and returns it. Because all values in +Kaleidoscope are doubles, there is no need to pass in a type to use. + +With this in place, the first functionality change we want to make is to +variable references. In our new scheme, variables live on the stack, so +code generating a reference to them actually needs to produce a load +from the stack slot: + +.. code-block:: ocaml + + let rec codegen_expr = function + ... + | Ast.Variable name -> + let v = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + (* Load the value. *) + build_load v name builder + +As you can see, this is pretty straightforward. Now we need to update +the things that define the variables to set up the alloca. We'll start +with ``codegen_expr Ast.For ...`` (see the `full code listing <#code>`_ +for the unabridged code): + +.. code-block:: ocaml + + | Ast.For (var_name, start, end_, step, body) -> + let the_function = block_parent (insertion_block builder) in + + (* Create an alloca for the variable in the entry block. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Store the value into the alloca. *) + ignore(build_store start_val alloca builder); + + ... + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name alloca; + + ... + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Reload, increment, and restore the alloca. This handles the case where + * the body of the loop mutates the variable. *) + let cur_var = build_load alloca var_name builder in + let next_var = build_add cur_var step_val "nextvar" builder in + ignore(build_store next_var alloca builder); + ... + +This code is virtually identical to the code `before we allowed mutable +variables `_. The big difference is that +we no longer have to construct a PHI node, and we use load/store to +access the variable as needed. + +To support mutable argument variables, we need to also make allocas for +them. The code for this is also pretty simple: + +.. code-block:: ocaml + + (* Create an alloca for each argument and register the argument in the symbol + * table so that references to it will succeed. *) + let create_argument_allocas the_function proto = + let args = match proto with + | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args + in + Array.iteri (fun i ai -> + let var_name = args.(i) in + (* Create an alloca for this variable. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Store the initial value into the alloca. *) + ignore(build_store ai alloca builder); + + (* Add arguments to variable symbol table. *) + Hashtbl.add named_values var_name alloca; + ) (params the_function) + +For each argument, we make an alloca, store the input value to the +function into the alloca, and register the alloca as the memory location +for the argument. This method gets invoked by ``Codegen.codegen_func`` +right after it sets up the entry block for the function. + +The final missing piece is adding the mem2reg pass, which allows us to +get good codegen once again: + +.. code-block:: ocaml + + let main () = + ... + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Promote allocas to registers. *) + add_memory_to_register_promotion the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combining the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + +It is interesting to see what the code looks like before and after the +mem2reg optimization runs. For example, this is the before/after code +for our recursive fib function. Before the optimization: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %x1 = alloca double + store double %x, double* %x1 + %x2 = load double* %x1 + %cmptmp = fcmp ult double %x2, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: ; preds = %entry + br label %ifcont + + else: ; preds = %entry + %x3 = load double* %x1 + %subtmp = fsub double %x3, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %x4 = load double* %x1 + %subtmp5 = fsub double %x4, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ] + ret double %iftmp + } + +Here there is only one variable (x, the input argument) but you can +still see the extremely simple-minded code generation strategy we are +using. In the entry block, an alloca is created, and the initial input +value is stored into it. Each reference to the variable does a reload +from the stack. Also, note that we didn't modify the if/then/else +expression, so it still inserts a PHI node. While we could make an +alloca for it, it is actually easier to create a PHI node for it, so we +still just make the PHI. + +Here is the code after the mem2reg pass runs: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %cmptmp = fcmp ult double %x, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 + br i1 %ifcond, label %then, label %else + + then: + br label %ifcont + + else: + %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %subtmp5 = fsub double %x, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ] + ret double %iftmp + } + +This is a trivial case for mem2reg, since there are no redefinitions of +the variable. The point of showing this is to calm your tension about +inserting such blatent inefficiencies :). + +After the rest of the optimizers run, we get: + +.. code-block:: llvm + + define double @fib(double %x) { + entry: + %cmptmp = fcmp ult double %x, 3.000000e+00 + %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp ueq double %booltmp, 0.000000e+00 + br i1 %ifcond, label %else, label %ifcont + + else: + %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) + %subtmp5 = fsub double %x, 2.000000e+00 + %calltmp6 = call double @fib(double %subtmp5) + %addtmp = fadd double %calltmp, %calltmp6 + ret double %addtmp + + ifcont: + ret double 1.000000e+00 + } + +Here we see that the simplifycfg pass decided to clone the return +instruction into the end of the 'else' block. This allowed it to +eliminate some branches and the PHI node. + +Now that all symbol table references are updated to use stack variables, +we'll add the assignment operator. + +New Assignment Operator +======================= + +With our current framework, adding a new assignment operator is really +simple. We will parse it just like any other binary operator, but handle +it internally (instead of allowing the user to define it). The first +step is to set a precedence: + +.. code-block:: ocaml + + let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '=' 2; + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + ... + +Now that the parser knows the precedence of the binary operator, it +takes care of all the parsing and AST generation. We just need to +implement codegen for the assignment operator. This looks like: + +.. code-block:: ocaml + + let rec codegen_expr = function + begin match op with + | '=' -> + (* Special case '=' because we don't want to emit the LHS as an + * expression. *) + let name = + match lhs with + | Ast.Variable name -> name + | _ -> raise (Error "destination of '=' must be a variable") + in + +Unlike the rest of the binary operators, our assignment operator doesn't +follow the "emit LHS, emit RHS, do computation" model. As such, it is +handled as a special case before the other binary operators are handled. +The other strange thing is that it requires the LHS to be a variable. It +is invalid to have "(x+1) = expr" - only things like "x = expr" are +allowed. + +.. code-block:: ocaml + + (* Codegen the rhs. *) + let val_ = codegen_expr rhs in + + (* Lookup the name. *) + let variable = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + ignore(build_store val_ variable builder); + val_ + | _ -> + ... + +Once we have the variable, codegen'ing the assignment is +straightforward: we emit the RHS of the assignment, create a store, and +return the computed value. Returning a value allows for chained +assignments like "X = (Y = Z)". + +Now that we have an assignment operator, we can mutate loop variables +and arguments. For example, we can now run code like this: + +:: + + # Function to print a double. + extern printd(x); + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y; + + def test(x) + printd(x) : + x = 4 : + printd(x); + + test(123); + +When run, this example prints "123" and then "4", showing that we did +actually mutate the value! Okay, we have now officially implemented our +goal: getting this to work requires SSA construction in the general +case. However, to be really useful, we want the ability to define our +own local variables, lets add this next! + +User-defined Local Variables +============================ + +Adding var/in is just like any other other extensions we made to +Kaleidoscope: we extend the lexer, the parser, the AST and the code +generator. The first step for adding our new 'var/in' construct is to +extend the lexer. As before, this is pretty trivial, the code looks like +this: + +.. code-block:: ocaml + + type token = + ... + (* var definition *) + | Var + + ... + + and lex_ident buffer = parser + ... + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + | "var" -> [< 'Token.Var; stream >] + ... + +The next step is to define the AST node that we will construct. For +var/in, it looks like this: + +.. code-block:: ocaml + + type expr = + ... + (* variant for var/in. *) + | Var of (string * expr option) array * expr + ... + +var/in allows a list of names to be defined all at once, and each name +can optionally have an initializer value. As such, we capture this +information in the VarNames vector. Also, var/in has a body, this body +is allowed to access the variables defined by the var/in. + +With this in place, we can define the parser pieces. The first thing we +do is add it as a primary expression: + +.. code-block:: ocaml + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr + * ::= varexpr *) + let rec parse_primary = parser + ... + (* varexpr + * ::= 'var' identifier ('=' expression? + * (',' identifier ('=' expression)?)* 'in' expression *) + | [< 'Token.Var; + (* At least one variable name is required. *) + 'Token.Ident id ?? "expected identifier after var"; + init=parse_var_init; + var_names=parse_var_names [(id, init)]; + (* At this point, we have to have 'in'. *) + 'Token.In ?? "expected 'in' keyword after 'var'"; + body=parse_expr >] -> + Ast.Var (Array.of_list (List.rev var_names), body) + + ... + + and parse_var_init = parser + (* read in the optional initializer. *) + | [< 'Token.Kwd '='; e=parse_expr >] -> Some e + | [< >] -> None + + and parse_var_names accumulator = parser + | [< 'Token.Kwd ','; + 'Token.Ident id ?? "expected identifier list after var"; + init=parse_var_init; + e=parse_var_names ((id, init) :: accumulator) >] -> e + | [< >] -> accumulator + +Now that we can parse and represent the code, we need to support +emission of LLVM IR for it. This code starts out with: + +.. code-block:: ocaml + + let rec codegen_expr = function + ... + | Ast.Var (var_names, body) + let old_bindings = ref [] in + + let the_function = block_parent (insertion_block builder) in + + (* Register all variables and emit their initializer. *) + Array.iter (fun (var_name, init) -> + +Basically it loops over all the variables, installing them one at a +time. For each variable we put into the symbol table, we remember the +previous value that we replace in OldBindings. + +.. code-block:: ocaml + + (* Emit the initializer before adding the variable to scope, this + * prevents the initializer from referencing the variable itself, and + * permits stuff like this: + * var a = 1 in + * var a = a in ... # refers to outer 'a'. *) + let init_val = + match init with + | Some init -> codegen_expr init + (* If not specified, use 0.0. *) + | None -> const_float double_type 0.0 + in + + let alloca = create_entry_block_alloca the_function var_name in + ignore(build_store init_val alloca builder); + + (* Remember the old variable binding so that we can restore the binding + * when we unrecurse. *) + + begin + try + let old_value = Hashtbl.find named_values var_name in + old_bindings := (var_name, old_value) :: !old_bindings; + with Not_found > () + end; + + (* Remember this binding. *) + Hashtbl.add named_values var_name alloca; + ) var_names; + +There are more comments here than code. The basic idea is that we emit +the initializer, create the alloca, then update the symbol table to +point to it. Once all the variables are installed in the symbol table, +we evaluate the body of the var/in expression: + +.. code-block:: ocaml + + (* Codegen the body, now that all vars are in scope. *) + let body_val = codegen_expr body in + +Finally, before returning, we restore the previous variable bindings: + +.. code-block:: ocaml + + (* Pop all our variables from scope. *) + List.iter (fun (var_name, old_value) -> + Hashtbl.add named_values var_name old_value + ) !old_bindings; + + (* Return the body computation. *) + body_val + +The end result of all of this is that we get properly scoped variable +definitions, and we even (trivially) allow mutation of them :). + +With this, we completed what we set out to do. Our nice iterative fib +example from the intro compiles and runs just fine. The mem2reg pass +optimizes all of our stack variables into SSA registers, inserting PHI +nodes where needed, and our front-end remains simple: no "iterated +dominance frontier" computation anywhere in sight. + +Full Code Listing +================= + +Here is the complete code listing for our running example, enhanced with +mutable variables and var/in support. To build this example, use: + +.. code-block:: bash + + # Compile + ocamlbuild toy.byte + # Run + ./toy.byte + +Here is the code: + +\_tags: + :: + + <{lexer,parser}.ml>: use_camlp4, pp(camlp4of) + <*.{byte,native}>: g++, use_llvm, use_llvm_analysis + <*.{byte,native}>: use_llvm_executionengine, use_llvm_target + <*.{byte,native}>: use_llvm_scalar_opts, use_bindings + +myocamlbuild.ml: + .. code-block:: ocaml + + open Ocamlbuild_plugin;; + + ocaml_lib ~extern:true "llvm";; + ocaml_lib ~extern:true "llvm_analysis";; + ocaml_lib ~extern:true "llvm_executionengine";; + ocaml_lib ~extern:true "llvm_target";; + ocaml_lib ~extern:true "llvm_scalar_opts";; + + flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);; + dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; + +token.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + + (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) + type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In + + (* operators *) + | Binary | Unary + + (* var definition *) + | Var + +lexer.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + + let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + + and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + + and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + | "var" -> [< 'Token.Var; stream >] + | id -> [< 'Token.Ident id; stream >] + + and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] + +ast.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + + (* expr - Base type for all expression nodes. *) + type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a unary operator. *) + | Unary of char * expr + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + + (* variant for var/in. *) + | Var of (string * expr option) array * expr + + (* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) + type proto = + | Prototype of string * string array + | BinOpPrototype of string * string array * int + + (* func - This type represents a function definition itself. *) + type func = Function of proto * expr + +parser.ml: + .. code-block:: ocaml + + (*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + + (* binop_precedence - This holds the precedence for each binary operator that is + * defined *) + let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + + (* precedence - Get the precedence of the pending binary operator token. *) + let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + + (* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr + * ::= varexpr *) + let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + (* varexpr + * ::= 'var' identifier ('=' expression? + * (',' identifier ('=' expression)?)* 'in' expression *) + | [< 'Token.Var; + (* At least one variable name is required. *) + 'Token.Ident id ?? "expected identifier after var"; + init=parse_var_init; + var_names=parse_var_names [(id, init)]; + (* At this point, we have to have 'in'. *) + 'Token.In ?? "expected 'in' keyword after 'var'"; + body=parse_expr >] -> + Ast.Var (Array.of_list (List.rev var_names), body) + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + + (* unary + * ::= primary + * ::= '!' unary *) + and parse_unary = parser + (* If this is a unary operator, read it. *) + | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> + Ast.Unary (op, operand) + + (* If the current token is not an operator, it must be a primary expr. *) + | [< stream >] -> parse_primary stream + + (* binoprhs + * ::= ('+' primary)* *) + and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_unary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + + and parse_var_init = parser + (* read in the optional initializer. *) + | [< 'Token.Kwd '='; e=parse_expr >] -> Some e + | [< >] -> None + + and parse_var_names accumulator = parser + | [< 'Token.Kwd ','; + 'Token.Ident id ?? "expected identifier list after var"; + init=parse_var_init; + e=parse_var_names ((id, init) :: accumulator) >] -> e + | [< >] -> accumulator + + (* expression + * ::= primary binoprhs *) + and parse_expr = parser + | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream + + (* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) + let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + + (* definition ::= 'def' prototype expression *) + let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + + (* toplevelexpr ::= expression *) + let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + + (* external ::= 'extern' prototype *) + let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e + +codegen.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + + open Llvm + + exception Error of string + + let context = global_context () + let the_module = create_module context "my cool jit" + let builder = builder context + let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 + let double_type = double_type context + + (* Create an alloca instruction in the entry block of the function. This + * is used for mutable variables etc. *) + let create_entry_block_alloca the_function var_name = + let builder = builder_at context (instr_begin (entry_block the_function)) in + build_alloca double_type var_name builder + + let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + let v = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + (* Load the value. *) + build_load v name builder + | Ast.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown unary operator") + in + build_call callee [|operand|] "unop" builder + | Ast.Binary (op, lhs, rhs) -> + begin match op with + | '=' -> + (* Special case '=' because we don't want to emit the LHS as an + * expression. *) + let name = + match lhs with + | Ast.Variable name -> name + | _ -> raise (Error "destination of '=' must be a variable") + in + + (* Codegen the rhs. *) + let val_ = codegen_expr rhs in + + (* Lookup the name. *) + let variable = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + ignore(build_store val_ variable builder); + val_ + | _ -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "binary operator not found!") + in + build_call callee [|lhs_val; rhs_val|] "binop" builder + end + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Output this as: + * var = alloca double + * ... + * start = startexpr + * store start -> var + * goto loop + * loop: + * ... + * bodyexpr + * ... + * loopend: + * step = stepexpr + * endcond = endexpr + * + * curvar = load var + * nextvar = curvar + step + * store nextvar -> var + * br endcond, loop, endloop + * outloop: *) + + let the_function = block_parent (insertion_block builder) in + + (* Create an alloca for the variable in the entry block. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Store the value into the alloca. *) + ignore(build_store start_val alloca builder); + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name alloca; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Reload, increment, and restore the alloca. This handles the case where + * the body of the loop mutates the variable. *) + let cur_var = build_load alloca var_name builder in + let next_var = build_add cur_var step_val "nextvar" builder in + ignore(build_store next_var alloca builder); + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + | Ast.Var (var_names, body) -> + let old_bindings = ref [] in + + let the_function = block_parent (insertion_block builder) in + + (* Register all variables and emit their initializer. *) + Array.iter (fun (var_name, init) -> + (* Emit the initializer before adding the variable to scope, this + * prevents the initializer from referencing the variable itself, and + * permits stuff like this: + * var a = 1 in + * var a = a in ... # refers to outer 'a'. *) + let init_val = + match init with + | Some init -> codegen_expr init + (* If not specified, use 0.0. *) + | None -> const_float double_type 0.0 + in + + let alloca = create_entry_block_alloca the_function var_name in + ignore(build_store init_val alloca builder); + + (* Remember the old variable binding so that we can restore the binding + * when we unrecurse. *) + begin + try + let old_value = Hashtbl.find named_values var_name in + old_bindings := (var_name, old_value) :: !old_bindings; + with Not_found -> () + end; + + (* Remember this binding. *) + Hashtbl.add named_values var_name alloca; + ) var_names; + + (* Codegen the body, now that all vars are in scope. *) + let body_val = codegen_expr body in + + (* Pop all our variables from scope. *) + List.iter (fun (var_name, old_value) -> + Hashtbl.add named_values var_name old_value + ) !old_bindings; + + (* Return the body computation. *) + body_val + + let codegen_proto = function + | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + + (* Create an alloca for each argument and register the argument in the symbol + * table so that references to it will succeed. *) + let create_argument_allocas the_function proto = + let args = match proto with + | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args + in + Array.iteri (fun i ai -> + let var_name = args.(i) in + (* Create an alloca for this variable. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Store the initial value into the alloca. *) + ignore(build_store ai alloca builder); + + (* Add arguments to variable symbol table. *) + Hashtbl.add named_values var_name alloca; + ) (params the_function) + + let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* If this is an operator, install it. *) + begin match proto with + | Ast.BinOpPrototype (name, args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add Parser.binop_precedence op prec; + | _ -> () + end; + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + (* Add all arguments to the symbol table and create their allocas. *) + create_argument_allocas the_function proto; + + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e + +toplevel.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + + (* top ::= definition | external | expression | ';' *) + let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream + +toy.ml: + .. code-block:: ocaml + + (*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + + open Llvm + open Llvm_executionengine + open Llvm_target + open Llvm_scalar_opts + + let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '=' 2; + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Promote allocas to registers. *) + add_memory_to_register_promotion the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module + ;; + + main () + +bindings.c + .. code-block:: c + + #include + + /* putchard - putchar that takes a double and returns 0. */ + extern double putchard(double X) { + putchar((char)X); + return 0; + } + + /* printd - printf that takes a double prints it as "%f\n", returning 0. */ + extern double printd(double X) { + printf("%f\n", X); + return 0; + } + +`Next: Conclusion and other useful LLVM tidbits `_ + diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html deleted file mode 100644 index 7c1a500a21bf..000000000000 --- a/docs/tutorial/OCamlLangImpl8.html +++ /dev/null @@ -1,359 +0,0 @@ - - - - - Kaleidoscope: Conclusion and other useful LLVM tidbits - - - - - - - -

Kaleidoscope: Conclusion and other useful LLVM tidbits

- - - - -
-

Written by Chris Lattner

-
- - -

Tutorial Conclusion

- - -
- -

Welcome to the final chapter of the "Implementing a -language with LLVM" tutorial. In the course of this tutorial, we have grown -our little Kaleidoscope language from being a useless toy, to being a -semi-interesting (but probably still useless) toy. :)

- -

It is interesting to see how far we've come, and how little code it has -taken. We built the entire lexer, parser, AST, code generator, and an -interactive run-loop (with a JIT!) by-hand in under 700 lines of -(non-comment/non-blank) code.

- -

Our little language supports a couple of interesting features: it supports -user defined binary and unary operators, it uses JIT compilation for immediate -evaluation, and it supports a few control flow constructs with SSA construction. -

- -

Part of the idea of this tutorial was to show you how easy and fun it can be -to define, build, and play with languages. Building a compiler need not be a -scary or mystical process! Now that you've seen some of the basics, I strongly -encourage you to take the code and hack on it. For example, try adding:

- -
    -
  • global variables - While global variables have questional value in -modern software engineering, they are often useful when putting together quick -little hacks like the Kaleidoscope compiler itself. Fortunately, our current -setup makes it very easy to add global variables: just have value lookup check -to see if an unresolved variable is in the global variable symbol table before -rejecting it. To create a new global variable, make an instance of the LLVM -GlobalVariable class.
  • - -
  • typed variables - Kaleidoscope currently only supports variables of -type double. This gives the language a very nice elegance, because only -supporting one type means that you never have to specify types. Different -languages have different ways of handling this. The easiest way is to require -the user to specify types for every variable definition, and record the type -of the variable in the symbol table along with its Value*.
  • - -
  • arrays, structs, vectors, etc - Once you add types, you can start -extending the type system in all sorts of interesting ways. Simple arrays are -very easy and are quite useful for many different applications. Adding them is -mostly an exercise in learning how the LLVM getelementptr instruction works: it -is so nifty/unconventional, it has its own FAQ! If you add support -for recursive types (e.g. linked lists), make sure to read the section in the LLVM -Programmer's Manual that describes how to construct them.
  • - -
  • standard runtime - Our current language allows the user to access -arbitrary external functions, and we use it for things like "printd" and -"putchard". As you extend the language to add higher-level constructs, often -these constructs make the most sense if they are lowered to calls into a -language-supplied runtime. For example, if you add hash tables to the language, -it would probably make sense to add the routines to a runtime, instead of -inlining them all the way.
  • - -
  • memory management - Currently we can only access the stack in -Kaleidoscope. It would also be useful to be able to allocate heap memory, -either with calls to the standard libc malloc/free interface or with a garbage -collector. If you would like to use garbage collection, note that LLVM fully -supports Accurate Garbage Collection -including algorithms that move objects and need to scan/update the stack.
  • - -
  • debugger support - LLVM supports generation of DWARF Debug info which is understood by -common debuggers like GDB. Adding support for debug info is fairly -straightforward. The best way to understand it is to compile some C/C++ code -with "llvm-gcc -g -O0" and taking a look at what it produces.
  • - -
  • exception handling support - LLVM supports generation of zero cost exceptions which interoperate -with code compiled in other languages. You could also generate code by -implicitly making every function return an error value and checking it. You -could also make explicit use of setjmp/longjmp. There are many different ways -to go here.
  • - -
  • object orientation, generics, database access, complex numbers, -geometric programming, ... - Really, there is -no end of crazy features that you can add to the language.
  • - -
  • unusual domains - We've been talking about applying LLVM to a domain -that many people are interested in: building a compiler for a specific language. -However, there are many other domains that can use compiler technology that are -not typically considered. For example, LLVM has been used to implement OpenGL -graphics acceleration, translate C++ code to ActionScript, and many other -cute and clever things. Maybe you will be the first to JIT compile a regular -expression interpreter into native code with LLVM?
  • - -
- -

-Have fun - try doing something crazy and unusual. Building a language like -everyone else always has, is much less fun than trying something a little crazy -or off the wall and seeing how it turns out. If you get stuck or want to talk -about it, feel free to email the llvmdev mailing -list: it has lots of people who are interested in languages and are often -willing to help out. -

- -

Before we end this tutorial, I want to talk about some "tips and tricks" for generating -LLVM IR. These are some of the more subtle things that may not be obvious, but -are very useful if you want to take advantage of LLVM's capabilities.

- -
- - -

Properties of the LLVM IR

- - -
- -

We have a couple common questions about code in the LLVM IR form - lets just -get these out of the way right now, shall we?

- - -

Target Independence

- - -
- -

Kaleidoscope is an example of a "portable language": any program written in -Kaleidoscope will work the same way on any target that it runs on. Many other -languages have this property, e.g. lisp, java, haskell, javascript, python, etc -(note that while these languages are portable, not all their libraries are).

- -

One nice aspect of LLVM is that it is often capable of preserving target -independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled -program and run it on any target that LLVM supports, even emitting C code and -compiling that on targets that LLVM doesn't support natively. You can trivially -tell that the Kaleidoscope compiler generates target-independent code because it -never queries for any target-specific information when generating code.

- -

The fact that LLVM provides a compact, target-independent, representation for -code gets a lot of people excited. Unfortunately, these people are usually -thinking about C or a language from the C family when they are asking questions -about language portability. I say "unfortunately", because there is really no -way to make (fully general) C code portable, other than shipping the source code -around (and of course, C source code is not actually portable in general -either - ever port a really old application from 32- to 64-bits?).

- -

The problem with C (again, in its full generality) is that it is heavily -laden with target specific assumptions. As one simple example, the preprocessor -often destructively removes target-independence from the code when it processes -the input text:

- -
-
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-
-
- -

While it is possible to engineer more and more complex solutions to problems -like this, it cannot be solved in full generality in a way that is better than shipping -the actual source code.

- -

That said, there are interesting subsets of C that can be made portable. If -you are willing to fix primitive types to a fixed size (say int = 32-bits, -and long = 64-bits), don't care about ABI compatibility with existing binaries, -and are willing to give up some other minor features, you can have portable -code. This can make sense for specialized domains such as an -in-kernel language.

- -
- - -

Safety Guarantees

- - -
- -

Many of the languages above are also "safe" languages: it is impossible for -a program written in Java to corrupt its address space and crash the process -(assuming the JVM has no bugs). -Safety is an interesting property that requires a combination of language -design, runtime support, and often operating system support.

- -

It is certainly possible to implement a safe language in LLVM, but LLVM IR -does not itself guarantee safety. The LLVM IR allows unsafe pointer casts, -use after free bugs, buffer over-runs, and a variety of other problems. Safety -needs to be implemented as a layer on top of LLVM and, conveniently, several -groups have investigated this. Ask on the llvmdev mailing -list if you are interested in more details.

- -
- - -

Language-Specific Optimizations

- - -
- -

One thing about LLVM that turns off many people is that it does not solve all -the world's problems in one system (sorry 'world hunger', someone else will have -to solve you some other day). One specific complaint is that people perceive -LLVM as being incapable of performing high-level language-specific optimization: -LLVM "loses too much information".

- -

Unfortunately, this is really not the place to give you a full and unified -version of "Chris Lattner's theory of compiler design". Instead, I'll make a -few observations:

- -

First, you're right that LLVM does lose information. For example, as of this -writing, there is no way to distinguish in the LLVM IR whether an SSA-value came -from a C "int" or a C "long" on an ILP32 machine (other than debug info). Both -get compiled down to an 'i32' value and the information about what it came from -is lost. The more general issue here, is that the LLVM type system uses -"structural equivalence" instead of "name equivalence". Another place this -surprises people is if you have two types in a high-level language that have the -same structure (e.g. two different structs that have a single int field): these -types will compile down into a single LLVM type and it will be impossible to -tell what it came from.

- -

Second, while LLVM does lose information, LLVM is not a fixed target: we -continue to enhance and improve it in many different ways. In addition to -adding new features (LLVM did not always support exceptions or debug info), we -also extend the IR to capture important information for optimization (e.g. -whether an argument is sign or zero extended, information about pointers -aliasing, etc). Many of the enhancements are user-driven: people want LLVM to -include some specific feature, so they go ahead and extend it.

- -

Third, it is possible and easy to add language-specific -optimizations, and you have a number of choices in how to do it. As one trivial -example, it is easy to add language-specific optimization passes that -"know" things about code compiled for a language. In the case of the C family, -there is an optimization pass that "knows" about the standard C library -functions. If you call "exit(0)" in main(), it knows that it is safe to -optimize that into "return 0;" because C specifies what the 'exit' -function does.

- -

In addition to simple library knowledge, it is possible to embed a variety of -other language-specific information into the LLVM IR. If you have a specific -need and run into a wall, please bring the topic up on the llvmdev list. At the -very worst, you can always treat LLVM as if it were a "dumb code generator" and -implement the high-level optimizations you desire in your front-end, on the -language-specific AST. -

- -
- -
- - -

Tips and Tricks

- - -
- -

There is a variety of useful tips and tricks that you come to know after -working on/with LLVM that aren't obvious at first glance. Instead of letting -everyone rediscover them, this section talks about some of these issues.

- - -

Implementing portable offsetof/sizeof

- - -
- -

One interesting thing that comes up, if you are trying to keep the code -generated by your compiler "target independent", is that you often need to know -the size of some LLVM type or the offset of some field in an llvm structure. -For example, you might need to pass the size of a type into a function that -allocates memory.

- -

Unfortunately, this can vary widely across targets: for example the width of -a pointer is trivially target-specific. However, there is a clever -way to use the getelementptr instruction that allows you to compute this -in a portable way.

- -
- - -

Garbage Collected Stack Frames

- - -
- -

Some languages want to explicitly manage their stack frames, often so that -they are garbage collected or to allow easy implementation of closures. There -are often better ways to implement these features than explicit stack frames, -but LLVM -does support them, if you want. It requires your front-end to convert the -code into Continuation -Passing Style and the use of tail calls (which LLVM also supports).

- -
- -
- - -
-
- Valid CSS! - Valid HTML 4.01! - - Chris Lattner
- The LLVM Compiler Infrastructure
- Last modified: $Date$ -
- - diff --git a/docs/tutorial/OCamlLangImpl8.rst b/docs/tutorial/OCamlLangImpl8.rst new file mode 100644 index 000000000000..3534b2e0c931 --- /dev/null +++ b/docs/tutorial/OCamlLangImpl8.rst @@ -0,0 +1,267 @@ +====================================================== +Kaleidoscope: Conclusion and other useful LLVM tidbits +====================================================== + +.. contents:: + :local: + +Tutorial Conclusion +=================== + +Welcome to the final chapter of the "`Implementing a language with +LLVM `_" tutorial. In the course of this tutorial, we have +grown our little Kaleidoscope language from being a useless toy, to +being a semi-interesting (but probably still useless) toy. :) + +It is interesting to see how far we've come, and how little code it has +taken. We built the entire lexer, parser, AST, code generator, and an +interactive run-loop (with a JIT!) by-hand in under 700 lines of +(non-comment/non-blank) code. + +Our little language supports a couple of interesting features: it +supports user defined binary and unary operators, it uses JIT +compilation for immediate evaluation, and it supports a few control flow +constructs with SSA construction. + +Part of the idea of this tutorial was to show you how easy and fun it +can be to define, build, and play with languages. Building a compiler +need not be a scary or mystical process! Now that you've seen some of +the basics, I strongly encourage you to take the code and hack on it. +For example, try adding: + +- **global variables** - While global variables have questional value + in modern software engineering, they are often useful when putting + together quick little hacks like the Kaleidoscope compiler itself. + Fortunately, our current setup makes it very easy to add global + variables: just have value lookup check to see if an unresolved + variable is in the global variable symbol table before rejecting it. + To create a new global variable, make an instance of the LLVM + ``GlobalVariable`` class. +- **typed variables** - Kaleidoscope currently only supports variables + of type double. This gives the language a very nice elegance, because + only supporting one type means that you never have to specify types. + Different languages have different ways of handling this. The easiest + way is to require the user to specify types for every variable + definition, and record the type of the variable in the symbol table + along with its Value\*. +- **arrays, structs, vectors, etc** - Once you add types, you can start + extending the type system in all sorts of interesting ways. Simple + arrays are very easy and are quite useful for many different + applications. Adding them is mostly an exercise in learning how the + LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction + works: it is so nifty/unconventional, it `has its own + FAQ <../GetElementPtr.html>`_! If you add support for recursive types + (e.g. linked lists), make sure to read the `section in the LLVM + Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that + describes how to construct them. +- **standard runtime** - Our current language allows the user to access + arbitrary external functions, and we use it for things like "printd" + and "putchard". As you extend the language to add higher-level + constructs, often these constructs make the most sense if they are + lowered to calls into a language-supplied runtime. For example, if + you add hash tables to the language, it would probably make sense to + add the routines to a runtime, instead of inlining them all the way. +- **memory management** - Currently we can only access the stack in + Kaleidoscope. It would also be useful to be able to allocate heap + memory, either with calls to the standard libc malloc/free interface + or with a garbage collector. If you would like to use garbage + collection, note that LLVM fully supports `Accurate Garbage + Collection <../GarbageCollection.html>`_ including algorithms that + move objects and need to scan/update the stack. +- **debugger support** - LLVM supports generation of `DWARF Debug + info <../SourceLevelDebugging.html>`_ which is understood by common + debuggers like GDB. Adding support for debug info is fairly + straightforward. The best way to understand it is to compile some + C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it + produces. +- **exception handling support** - LLVM supports generation of `zero + cost exceptions <../ExceptionHandling.html>`_ which interoperate with + code compiled in other languages. You could also generate code by + implicitly making every function return an error value and checking + it. You could also make explicit use of setjmp/longjmp. There are + many different ways to go here. +- **object orientation, generics, database access, complex numbers, + geometric programming, ...** - Really, there is no end of crazy + features that you can add to the language. +- **unusual domains** - We've been talking about applying LLVM to a + domain that many people are interested in: building a compiler for a + specific language. However, there are many other domains that can use + compiler technology that are not typically considered. For example, + LLVM has been used to implement OpenGL graphics acceleration, + translate C++ code to ActionScript, and many other cute and clever + things. Maybe you will be the first to JIT compile a regular + expression interpreter into native code with LLVM? + +Have fun - try doing something crazy and unusual. Building a language +like everyone else always has, is much less fun than trying something a +little crazy or off the wall and seeing how it turns out. If you get +stuck or want to talk about it, feel free to email the `llvmdev mailing +list `_: it has lots +of people who are interested in languages and are often willing to help +out. + +Before we end this tutorial, I want to talk about some "tips and tricks" +for generating LLVM IR. These are some of the more subtle things that +may not be obvious, but are very useful if you want to take advantage of +LLVM's capabilities. + +Properties of the LLVM IR +========================= + +We have a couple common questions about code in the LLVM IR form - lets +just get these out of the way right now, shall we? + +Target Independence +------------------- + +Kaleidoscope is an example of a "portable language": any program written +in Kaleidoscope will work the same way on any target that it runs on. +Many other languages have this property, e.g. lisp, java, haskell, +javascript, python, etc (note that while these languages are portable, +not all their libraries are). + +One nice aspect of LLVM is that it is often capable of preserving target +independence in the IR: you can take the LLVM IR for a +Kaleidoscope-compiled program and run it on any target that LLVM +supports, even emitting C code and compiling that on targets that LLVM +doesn't support natively. You can trivially tell that the Kaleidoscope +compiler generates target-independent code because it never queries for +any target-specific information when generating code. + +The fact that LLVM provides a compact, target-independent, +representation for code gets a lot of people excited. Unfortunately, +these people are usually thinking about C or a language from the C +family when they are asking questions about language portability. I say +"unfortunately", because there is really no way to make (fully general) +C code portable, other than shipping the source code around (and of +course, C source code is not actually portable in general either - ever +port a really old application from 32- to 64-bits?). + +The problem with C (again, in its full generality) is that it is heavily +laden with target specific assumptions. As one simple example, the +preprocessor often destructively removes target-independence from the +code when it processes the input text: + +.. code-block:: c + + #ifdef __i386__ + int X = 1; + #else + int X = 42; + #endif + +While it is possible to engineer more and more complex solutions to +problems like this, it cannot be solved in full generality in a way that +is better than shipping the actual source code. + +That said, there are interesting subsets of C that can be made portable. +If you are willing to fix primitive types to a fixed size (say int = +32-bits, and long = 64-bits), don't care about ABI compatibility with +existing binaries, and are willing to give up some other minor features, +you can have portable code. This can make sense for specialized domains +such as an in-kernel language. + +Safety Guarantees +----------------- + +Many of the languages above are also "safe" languages: it is impossible +for a program written in Java to corrupt its address space and crash the +process (assuming the JVM has no bugs). Safety is an interesting +property that requires a combination of language design, runtime +support, and often operating system support. + +It is certainly possible to implement a safe language in LLVM, but LLVM +IR does not itself guarantee safety. The LLVM IR allows unsafe pointer +casts, use after free bugs, buffer over-runs, and a variety of other +problems. Safety needs to be implemented as a layer on top of LLVM and, +conveniently, several groups have investigated this. Ask on the `llvmdev +mailing list `_ if +you are interested in more details. + +Language-Specific Optimizations +------------------------------- + +One thing about LLVM that turns off many people is that it does not +solve all the world's problems in one system (sorry 'world hunger', +someone else will have to solve you some other day). One specific +complaint is that people perceive LLVM as being incapable of performing +high-level language-specific optimization: LLVM "loses too much +information". + +Unfortunately, this is really not the place to give you a full and +unified version of "Chris Lattner's theory of compiler design". Instead, +I'll make a few observations: + +First, you're right that LLVM does lose information. For example, as of +this writing, there is no way to distinguish in the LLVM IR whether an +SSA-value came from a C "int" or a C "long" on an ILP32 machine (other +than debug info). Both get compiled down to an 'i32' value and the +information about what it came from is lost. The more general issue +here, is that the LLVM type system uses "structural equivalence" instead +of "name equivalence". Another place this surprises people is if you +have two types in a high-level language that have the same structure +(e.g. two different structs that have a single int field): these types +will compile down into a single LLVM type and it will be impossible to +tell what it came from. + +Second, while LLVM does lose information, LLVM is not a fixed target: we +continue to enhance and improve it in many different ways. In addition +to adding new features (LLVM did not always support exceptions or debug +info), we also extend the IR to capture important information for +optimization (e.g. whether an argument is sign or zero extended, +information about pointers aliasing, etc). Many of the enhancements are +user-driven: people want LLVM to include some specific feature, so they +go ahead and extend it. + +Third, it is *possible and easy* to add language-specific optimizations, +and you have a number of choices in how to do it. As one trivial +example, it is easy to add language-specific optimization passes that +"know" things about code compiled for a language. In the case of the C +family, there is an optimization pass that "knows" about the standard C +library functions. If you call "exit(0)" in main(), it knows that it is +safe to optimize that into "return 0;" because C specifies what the +'exit' function does. + +In addition to simple library knowledge, it is possible to embed a +variety of other language-specific information into the LLVM IR. If you +have a specific need and run into a wall, please bring the topic up on +the llvmdev list. At the very worst, you can always treat LLVM as if it +were a "dumb code generator" and implement the high-level optimizations +you desire in your front-end, on the language-specific AST. + +Tips and Tricks +=============== + +There is a variety of useful tips and tricks that you come to know after +working on/with LLVM that aren't obvious at first glance. Instead of +letting everyone rediscover them, this section talks about some of these +issues. + +Implementing portable offsetof/sizeof +------------------------------------- + +One interesting thing that comes up, if you are trying to keep the code +generated by your compiler "target independent", is that you often need +to know the size of some LLVM type or the offset of some field in an +llvm structure. For example, you might need to pass the size of a type +into a function that allocates memory. + +Unfortunately, this can vary widely across targets: for example the +width of a pointer is trivially target-specific. However, there is a +`clever way to use the getelementptr +instruction `_ +that allows you to compute this in a portable way. + +Garbage Collected Stack Frames +------------------------------ + +Some languages want to explicitly manage their stack frames, often so +that they are garbage collected or to allow easy implementation of +closures. There are often better ways to implement these features than +explicit stack frames, but `LLVM does support +them, `_ +if you want. It requires your front-end to convert the code into +`Continuation Passing +Style `_ and +the use of tail calls (which LLVM also supports). + diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html deleted file mode 100644 index 2c11a9a48b35..000000000000 --- a/docs/tutorial/index.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - LLVM Tutorial: Table of Contents - - - - - - - - -

LLVM Tutorial: Table of Contents

- -
    -
  1. Kaleidoscope: Implementing a Language with LLVM -
      -
    1. Tutorial Introduction and the Lexer
    2. -
    3. Implementing a Parser and AST
    4. -
    5. Implementing Code Generation to LLVM IR
    6. -
    7. Adding JIT and Optimizer Support
    8. -
    9. Extending the language: control flow
    10. -
    11. Extending the language: user-defined operators
    12. -
    13. Extending the language: mutable variables / SSA construction
    14. -
    15. Conclusion and other useful LLVM tidbits
    16. -
  2. -
  3. Kaleidoscope: Implementing a Language with LLVM in Objective Caml -
      -
    1. Tutorial Introduction and the Lexer
    2. -
    3. Implementing a Parser and AST
    4. -
    5. Implementing Code Generation to LLVM IR
    6. -
    7. Adding JIT and Optimizer Support
    8. -
    9. Extending the language: control flow
    10. -
    11. Extending the language: user-defined operators
    12. -
    13. Extending the language: mutable variables / SSA construction
    14. -
    15. Conclusion and other useful LLVM tidbits
    16. -
  4. -
  5. Advanced Topics -
      -
    1. Writing - an Optimization for LLVM
    2. -
  6. -
- - - diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst new file mode 100644 index 000000000000..69a9aee0962a --- /dev/null +++ b/docs/tutorial/index.rst @@ -0,0 +1,43 @@ +================================ +LLVM Tutorial: Table of Contents +================================ + +Kaleidoscope: Implementing a Language with LLVM +=============================================== + +.. toctree:: + :titlesonly: + :glob: + :numbered: + + LangImpl* + +Kaleidoscope: Implementing a Language with LLVM in Objective Caml +================================================================= + +.. toctree:: + :titlesonly: + :glob: + :numbered: + + OCamlLangImpl* + +External Tutorials +================== + +`Tutorial: Creating an LLVM Backend for the Cpu0 Architecture `_ + A step-by-step tutorial for developing an LLVM backend. Under + active development at ``_ (please + contribute!). + +`Howto: Implementing LLVM Integrated Assembler`_ + A simple guide for how to implement an LLVM integrated assembler for an + architecture. + +.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html + +Advanced Topics +=============== + +#. `Writing an Optimization for LLVM `_ + diff --git a/docs/userguides.rst b/docs/userguides.rst deleted file mode 100644 index 8c1554dfce9c..000000000000 --- a/docs/userguides.rst +++ /dev/null @@ -1,104 +0,0 @@ -.. _userguides: - -User Guides -=========== - -.. toctree:: - :hidden: - - CMake - HowToBuildOnARM - CommandGuide/index - DeveloperPolicy - GettingStarted - GettingStartedVS - FAQ - Lexicon - Packaging - HowToAddABuilder - yaml2obj - HowToSubmitABug - SphinxQuickstartTemplate - Phabricator - -* :ref:`getting_started` - - Discusses how to get up and running quickly with the LLVM infrastructure. - Everything from unpacking and compilation of the distribution to execution - of some tools. - -* :ref:`building-with-cmake` - - An addendum to the main Getting Started guide for those using the `CMake - build system `_. - -* :ref:`how_to_build_on_arm` - - Notes on building and testing LLVM/Clang on ARM. - -* `Getting Started with the LLVM System using Microsoft Visual Studio - `_ - - An addendum to the main Getting Started guide for those using Visual Studio - on Windows. - -* `LLVM Tutorial `_ - - A walk through the process of using LLVM for a custom language, and the - facilities LLVM offers in tutorial form. - -* :ref:`developer_policy` - - The LLVM project's policy towards developers and their contributions. - -* :ref:`LLVM Command Guide ` - - A reference manual for the LLVM command line utilities ("man" pages for LLVM - tools). - -* `LLVM's Analysis and Transform Passes `_ - - A list of optimizations and analyses implemented in LLVM. - -* :ref:`faq` - - A list of common questions and problems and their solutions. - -* `Release notes for the current release `_ - - This describes new features, known bugs, and other limitations. - -* :ref:`how-to-submit-a-bug-report` - - Instructions for properly submitting information about any bugs you run into - in the LLVM system. -* :doc:`SphinxQuickstartTemplate` - - A template + tutorial for writing new Sphinx documentation. It is meant - to be read in source form. - -* `LLVM Testing Infrastructure Guide `_ - - A reference manual for using the LLVM testing infrastructure. - -* `How to build the C, C++, ObjC, and ObjC++ front end `_ - - Instructions for building the clang front-end from source. - -* :ref:`packaging` - - Advice on packaging LLVM into a distribution. - -* :ref:`lexicon` - - Definition of acronyms, terms and concepts used in LLVM. - -* :ref:`how_to_add_a_builder` - - Instructions for adding new builder to LLVM buildbot master. - -* **IRC** -- You can probably find help on the unofficial LLVM IRC. - - We often are on irc.oftc.net in the #llvm channel. If you are using the - mozilla browser, and have chatzilla installed, you can `join #llvm on - irc.oftc.net `_. diff --git a/docs/yaml2obj.rst b/docs/yaml2obj.rst index d051e7e22c00..b269806e06f6 100644 --- a/docs/yaml2obj.rst +++ b/docs/yaml2obj.rst @@ -1,5 +1,3 @@ -.. _yaml2obj: - yaml2obj ======== diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index b002d1f496d2..f8129b819e3a 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -24,10 +24,10 @@ //===--------------------------------------------------------------------===// #include "BrainF.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include using namespace llvm; diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h index c069feb51e72..15e9e0847141 100644 --- a/examples/BrainF/BrainF.h +++ b/examples/BrainF/BrainF.h @@ -15,9 +15,9 @@ #ifndef BRAINF_H #define BRAINF_H -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" using namespace llvm; diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index 58617b7f3809..cd6eabfdffaa 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -25,17 +25,17 @@ //===--------------------------------------------------------------------===// #include "BrainF.h" -#include "llvm/Constants.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" -#include #include +#include using namespace llvm; //Command line options diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 215cb4d3714f..264ef5481f57 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -41,27 +41,27 @@ // Cases -1 and 7 are caught by a C++ test harness where the validity of // of a C++ catch(...) clause catching a generated exception with a // type info type of 7 is explained by: example in rules 1.6.4 in -// http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22) +// http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22) // // This code uses code from the llvm compiler-rt project and the llvm // Kaleidoscope project. // //===----------------------------------------------------------------------===// -#include "llvm/LLVMContext.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" -#include "llvm/IRBuilder.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" -#include "llvm/Intrinsics.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" // FIXME: Although all systems tested with (Linux, OS X), do not need this // header file included. A user on ubuntu reported, undefined symbols @@ -82,7 +82,7 @@ #endif // System C++ ABI unwind types from: -// http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22) +// http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22) extern "C" { @@ -151,7 +151,7 @@ struct OurExceptionType_t { /// /// Note: The above unwind.h defines struct _Unwind_Exception to be aligned /// on a double word boundary. This is necessary to match the standard: -/// http://refspecs.freestandards.org/abi-eh-1.21.html +/// http://mentorembedded.github.com/cxx-abi/abi-eh.html struct OurBaseException_t { struct OurExceptionType_t type; @@ -339,7 +339,7 @@ void deleteOurException(OurUnwindException *expToDelete) { /// This function is the struct _Unwind_Exception API mandated delete function /// used by foreign exception handlers when deleting our exception /// (OurException), instances. -/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html +/// @param reason @link http://mentorembedded.github.com/cxx-abi/abi-eh.html /// @unlink /// @param expToDelete exception instance to delete void deleteFromUnwindOurException(_Unwind_Reason_Code reason, @@ -512,7 +512,7 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { /// are supported. Filters are not supported. /// See Variable Length Data in: /// @link http://dwarfstd.org/Dwarf3.pdf @unlink -/// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// Also see @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink /// @param resultAction reference variable which will be set with result /// @param classInfo our array of type info pointers (to globals) /// @param actionEntry index into above type info array or 0 (clean up). @@ -599,7 +599,7 @@ static bool handleActionValue(int64_t *resultAction, /// Deals with the Language specific data portion of the emitted dwarf code. -/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink /// @param version unsupported (ignored), unwind version /// @param lsda language specific data area /// @param _Unwind_Action actions minimally supported unwind stage @@ -667,8 +667,6 @@ static _Unwind_Reason_Code handleLsda(int version, const uint8_t *actionTableStart = callSiteTableEnd; const uint8_t *callSitePtr = callSiteTableStart; - bool foreignException = false; - while (callSitePtr < callSiteTableEnd) { uintptr_t start = readEncodedPointer(&callSitePtr, callSiteEncoding); @@ -684,7 +682,6 @@ static _Unwind_Reason_Code handleLsda(int version, // We have been notified of a foreign exception being thrown, // and we therefore need to execute cleanup landing pads actionEntry = 0; - foreignException = true; } if (landingPad == 0) { @@ -786,7 +783,7 @@ static _Unwind_Reason_Code handleLsda(int version, /// This is the personality function which is embedded (dwarf emitted), in the /// dwarf unwind info block. Again see: JITDwarfEmitter.cpp. -/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink /// @param version unsupported (ignored), unwind version /// @param _Unwind_Action actions minimally supported unwind stage /// (forced specifically not supported) @@ -834,7 +831,7 @@ _Unwind_Reason_Code ourPersonality(int version, /// Generates our _Unwind_Exception class from a given character array. /// thereby handling arbitrary lengths (not in standard), and handling /// embedded \0s. -/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink /// @param classChars char array to encode. NULL values not checkedf /// @param classCharsSize number of chars in classChars. Value is not checked. /// @returns class value @@ -1595,7 +1592,7 @@ void runExceptionThrow(llvm::ExecutionEngine *engine, catch (...) { // Catch all exceptions including our generated ones. This latter // functionality works according to the example in rules 1.6.4 of - // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22), + // http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22), // given that these will be exceptions foreign to C++ // (the _Unwind_Exception::exception_class should be different from // the one used by C++). @@ -1687,7 +1684,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos, std::vector structVals; llvm::Constant *nextStruct; - llvm::GlobalVariable *nextGlobal = NULL; // Generate each type info // @@ -1702,7 +1698,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos, typeInfoName = typeInfoNameBuilder.str(); // Note: Does not seem to work without allocation - nextGlobal = new llvm::GlobalVariable(module, ourTypeInfoType, true, diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index 417ad6f4b602..8cbf7d159fc5 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -23,17 +23,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/ExecutionEngine/JIT.h" -#include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static Function *CreateFibFunction(Module *M, LLVMContext &Context) { diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 5588e923df83..7125a1561045 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -34,17 +34,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/ExecutionEngine/JIT.h" -#include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/Support/TargetSelect.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp index f4f09d0b351a..1cf6caacb6af 100644 --- a/examples/Kaleidoscope/Chapter2/toy.cpp +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include //===----------------------------------------------------------------------===// diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp index c1e34b2f09ad..48cfbe6decb2 100644 --- a/examples/Kaleidoscope/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -1,11 +1,11 @@ -#include "llvm/DerivedTypes.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include -#include #include +#include #include using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index bc6028c900e7..971a7c68b217 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -1,18 +1,18 @@ -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" #include -#include #include +#include #include using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 2b0b9d54feb8..5558d08e1d0c 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -1,18 +1,18 @@ -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" #include -#include #include +#include #include using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index b751e3516bf8..52926eb99f17 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1,18 +1,18 @@ -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" #include -#include #include +#include #include using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 0ac099659064..ba192d6243cd 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1,18 +1,18 @@ -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/DataLayout.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" #include -#include #include +#include #include using namespace llvm; diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp index 6bc52c12a034..c931972f5b60 100644 --- a/examples/ModuleMaker/ModuleMaker.cpp +++ b/examples/ModuleMaker/ModuleMaker.cpp @@ -13,12 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index 305cf1dde06f..64a388695ff2 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -17,17 +17,17 @@ // call into the JIT at the same time (or the best possible approximation of the // same time). This test had assertion errors until I got the locking right. -#include -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/ExecutionEngine/JIT.h" -#include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetSelect.h" #include +#include using namespace llvm; static Function* createAdd1(Module *M) { diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 620d0887be73..e85fb9750503 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -21,8 +21,8 @@ /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' and 'unwrap' conversion functions. */ -#include "llvm/IRBuilder.h" -#include "llvm/Module.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/PassRegistry.h" extern "C" { @@ -173,10 +173,11 @@ typedef enum { LLVMUWTable = 1 << 30, LLVMNonLazyBind = 1 << 31 - /* FIXME: This attribute is currently not included in the C API as + /* FIXME: These attributes are currently not included in the C API as a temporary measure until the API/ABI impact to the C API is understood and the path forward agreed upon. - LLVMAddressSafety = 1ULL << 32 + LLVMAddressSafety = 1ULL << 32, + LLVMStackProtectStrongAttribute = 1ULL<<33 */ } LLVMAttribute; @@ -357,6 +358,11 @@ typedef enum { void LLVMInitializeCore(LLVMPassRegistryRef R); +/** Deallocate and destroy all ManagedStatic variables. + @see llvm::llvm_shutdown + @see ManagedStatic */ +void LLVMShutdown(); + /*===-- Error handling ----------------------------------------------------===*/ @@ -2547,6 +2553,13 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, char **OutMessage); LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage); +LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(const char *InputData, + size_t InputDataLength, + const char *BufferName, + LLVMBool RequiresNullTerminator); +LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(const char *InputData, + size_t InputDataLength, + const char *BufferName); void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); /** @@ -2614,6 +2627,34 @@ LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM); @see llvm::PassManagerBase::~PassManagerBase. */ void LLVMDisposePassManager(LLVMPassManagerRef PM); +/** + * @} + */ + +/** + * @defgroup LLVMCCoreThreading Threading + * + * Handle the structures needed to make LLVM safe for multithreading. + * + * @{ + */ + +/** Allocate and initialize structures needed to make LLVM safe for + multithreading. The return value indicates whether multithreaded + initialization succeeded. Must be executed in isolation from all + other LLVM api calls. + @see llvm::llvm_start_multithreaded */ +LLVMBool LLVMStartMultithreaded(); + +/** Deallocate structures necessary to make LLVM safe for multithreading. + Must be executed in isolation from all other LLVM api calls. + @see llvm::llvm_stop_multithreaded */ +void LLVMStopMultithreaded(); + +/** Check whether LLVM is executing in thread-safe mode or not. + @see llvm::llvm_is_multithreaded */ +LLVMBool LLVMIsMultithreaded(); + /** * @} */ diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h index b8c4ad9ad738..df65a7b20846 100644 --- a/include/llvm-c/Disassembler.h +++ b/include/llvm-c/Disassembler.h @@ -139,12 +139,25 @@ extern "C" { * by passing a block of information in the DisInfo parameter and specifying the * TagType and callback functions as described above. These can all be passed * as NULL. If successful, this returns a disassembler context. If not, it - * returns NULL. + * returns NULL. This function is equivalent to calling LLVMCreateDisasmCPU() + * with an empty CPU name. */ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, int TagType, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp); +/** + * Create a disassembler for the TripleName and a specific CPU. Symbolic + * disassembly is supported by passing a block of information in the DisInfo + * parameter and specifying the TagType and callback functions as described + * above. These can all be passed * as NULL. If successful, this returns a + * disassembler context. If not, it returns NULL. + */ +LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, + void *DisInfo, int TagType, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp); + /** * Set the disassembler's options. Returns 1 if it can set the Options and 0 * otherwise. @@ -153,6 +166,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options); /* The option to produce marked up assembly. */ #define LLVMDisassembler_Option_UseMarkup 1 +/* The option to print immediates as hex. */ +#define LLVMDisassembler_Option_PrintImmHex 2 +/* The option use the other assembler printer variant */ +#define LLVMDisassembler_Option_AsmPrinterVariant 4 /** * Dispose of a disassembler context. diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h deleted file mode 100644 index 71a0d496c028..000000000000 --- a/include/llvm-c/EnhancedDisassembly.h +++ /dev/null @@ -1,530 +0,0 @@ -/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\ -|* *| -|* The LLVM Compiler Infrastructure *| -|* *| -|* This file is distributed under the University of Illinois Open Source *| -|* License. See LICENSE.TXT for details. *| -|* *| -|*===----------------------------------------------------------------------===*| -|* *| -|* This header declares the C interface to EnhancedDisassembly.so, which *| -|* implements a disassembler with the ability to extract operand values and *| -|* individual tokens from assembly instructions. *| -|* *| -|* The header declares additional interfaces if the host compiler supports *| -|* the blocks API. *| -|* *| -\*===----------------------------------------------------------------------===*/ - -#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H -#define LLVM_C_ENHANCEDDISASSEMBLY_H - -#include "llvm/Support/DataTypes.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @defgroup LLVMCEnhancedDisassembly Enhanced Disassembly - * @ingroup LLVMC - * @deprecated - * - * This module contains an interface to the Enhanced Disassembly (edis) - * library. The edis library is deprecated and will likely disappear in - * the near future. You should use the @ref LLVMCDisassembler interface - * instead. - * - * @{ - */ - -/*! - @typedef EDByteReaderCallback - Interface to memory from which instructions may be read. - @param byte A pointer whose target should be filled in with the data returned. - @param address The address of the byte to be read. - @param arg An anonymous argument for client use. - @result 0 on success; -1 otherwise. - */ -typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); - -/*! - @typedef EDRegisterReaderCallback - Interface to registers from which registers may be read. - @param value A pointer whose target should be filled in with the value of the - register. - @param regID The LLVM register identifier for the register to read. - @param arg An anonymous argument for client use. - @result 0 if the register could be read; -1 otherwise. - */ -typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, - void* arg); - -/*! - @typedef EDAssemblySyntax_t - An assembly syntax for use in tokenizing instructions. - */ -enum { -/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ - kEDAssemblySyntaxX86Intel = 0, -/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ - kEDAssemblySyntaxX86ATT = 1, - kEDAssemblySyntaxARMUAL = 2 -}; -typedef unsigned EDAssemblySyntax_t; - -/*! - @typedef EDDisassemblerRef - Encapsulates a disassembler for a single CPU architecture. - */ -typedef void *EDDisassemblerRef; - -/*! - @typedef EDInstRef - Encapsulates a single disassembled instruction in one assembly syntax. - */ -typedef void *EDInstRef; - -/*! - @typedef EDTokenRef - Encapsulates a token from the disassembly of an instruction. - */ -typedef void *EDTokenRef; - -/*! - @typedef EDOperandRef - Encapsulates an operand of an instruction. - */ -typedef void *EDOperandRef; - -/*! - @functiongroup Getting a disassembler - */ - -/*! - @function EDGetDisassembler - Gets the disassembler for a given target. - @param disassembler A pointer whose target will be filled in with the - disassembler. - @param triple Identifies the target. Example: "x86_64-apple-darwin10" - @param syntax The assembly syntax to use when decoding instructions. - @result 0 on success; -1 otherwise. - */ -int EDGetDisassembler(EDDisassemblerRef *disassembler, - const char *triple, - EDAssemblySyntax_t syntax); - -/*! - @functiongroup Generic architectural queries - */ - -/*! - @function EDGetRegisterName - Gets the human-readable name for a given register. - @param regName A pointer whose target will be pointed at the name of the - register. The name does not need to be deallocated and will be - @param disassembler The disassembler to query for the name. - @param regID The register identifier, as returned by EDRegisterTokenValue. - @result 0 on success; -1 otherwise. - */ -int EDGetRegisterName(const char** regName, - EDDisassemblerRef disassembler, - unsigned regID); - -/*! - @function EDRegisterIsStackPointer - Determines if a register is one of the platform's stack-pointer registers. - @param disassembler The disassembler to query. - @param regID The register identifier, as returned by EDRegisterTokenValue. - @result 1 if true; 0 otherwise. - */ -int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, - unsigned regID); - -/*! - @function EDRegisterIsProgramCounter - Determines if a register is one of the platform's stack-pointer registers. - @param disassembler The disassembler to query. - @param regID The register identifier, as returned by EDRegisterTokenValue. - @result 1 if true; 0 otherwise. - */ -int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, - unsigned regID); - -/*! - @functiongroup Creating and querying instructions - */ - -/*! - @function EDCreateInst - Gets a set of contiguous instructions from a disassembler. - @param insts A pointer to an array that will be filled in with the - instructions. Must have at least count entries. Entries not filled in will - be set to NULL. - @param count The maximum number of instructions to fill in. - @param disassembler The disassembler to use when decoding the instructions. - @param byteReader The function to use when reading the instruction's machine - code. - @param address The address of the first byte of the instruction. - @param arg An anonymous argument to be passed to byteReader. - @result The number of instructions read on success; 0 otherwise. - */ -unsigned int EDCreateInsts(EDInstRef *insts, - unsigned int count, - EDDisassemblerRef disassembler, - EDByteReaderCallback byteReader, - uint64_t address, - void *arg); - -/*! - @function EDReleaseInst - Frees the memory for an instruction. The instruction can no longer be accessed - after this call. - @param inst The instruction to be freed. - */ -void EDReleaseInst(EDInstRef inst); - -/*! - @function EDInstByteSize - @param inst The instruction to be queried. - @result The number of bytes in the instruction's machine-code representation. - */ -int EDInstByteSize(EDInstRef inst); - -/*! - @function EDGetInstString - Gets the disassembled text equivalent of the instruction. - @param buf A pointer whose target will be filled in with a pointer to the - string. (The string becomes invalid when the instruction is released.) - @param inst The instruction to be queried. - @result 0 on success; -1 otherwise. - */ -int EDGetInstString(const char **buf, - EDInstRef inst); - -/*! - @function EDInstID - @param instID A pointer whose target will be filled in with the LLVM identifier - for the instruction. - @param inst The instruction to be queried. - @result 0 on success; -1 otherwise. - */ -int EDInstID(unsigned *instID, EDInstRef inst); - -/*! - @function EDInstIsBranch - @param inst The instruction to be queried. - @result 1 if the instruction is a branch instruction; 0 if it is some other - type of instruction; -1 if there was an error. - */ -int EDInstIsBranch(EDInstRef inst); - -/*! - @function EDInstIsMove - @param inst The instruction to be queried. - @result 1 if the instruction is a move instruction; 0 if it is some other - type of instruction; -1 if there was an error. - */ -int EDInstIsMove(EDInstRef inst); - -/*! - @function EDBranchTargetID - @param inst The instruction to be queried. - @result The ID of the branch target operand, suitable for use with - EDCopyOperand. -1 if no such operand exists. - */ -int EDBranchTargetID(EDInstRef inst); - -/*! - @function EDMoveSourceID - @param inst The instruction to be queried. - @result The ID of the move source operand, suitable for use with - EDCopyOperand. -1 if no such operand exists. - */ -int EDMoveSourceID(EDInstRef inst); - -/*! - @function EDMoveTargetID - @param inst The instruction to be queried. - @result The ID of the move source operand, suitable for use with - EDCopyOperand. -1 if no such operand exists. - */ -int EDMoveTargetID(EDInstRef inst); - -/*! - @functiongroup Creating and querying tokens - */ - -/*! - @function EDNumTokens - @param inst The instruction to be queried. - @result The number of tokens in the instruction, or -1 on error. - */ -int EDNumTokens(EDInstRef inst); - -/*! - @function EDGetToken - Retrieves a token from an instruction. The token is valid until the - instruction is released. - @param token A pointer to be filled in with the token. - @param inst The instruction to be queried. - @param index The index of the token in the instruction. - @result 0 on success; -1 otherwise. - */ -int EDGetToken(EDTokenRef *token, - EDInstRef inst, - int index); - -/*! - @function EDGetTokenString - Gets the disassembled text for a token. - @param buf A pointer whose target will be filled in with a pointer to the - string. (The string becomes invalid when the token is released.) - @param token The token to be queried. - @result 0 on success; -1 otherwise. - */ -int EDGetTokenString(const char **buf, - EDTokenRef token); - -/*! - @function EDOperandIndexForToken - Returns the index of the operand to which a token belongs. - @param token The token to be queried. - @result The operand index on success; -1 otherwise - */ -int EDOperandIndexForToken(EDTokenRef token); - -/*! - @function EDTokenIsWhitespace - @param token The token to be queried. - @result 1 if the token is whitespace; 0 if not; -1 on error. - */ -int EDTokenIsWhitespace(EDTokenRef token); - -/*! - @function EDTokenIsPunctuation - @param token The token to be queried. - @result 1 if the token is punctuation; 0 if not; -1 on error. - */ -int EDTokenIsPunctuation(EDTokenRef token); - -/*! - @function EDTokenIsOpcode - @param token The token to be queried. - @result 1 if the token is opcode; 0 if not; -1 on error. - */ -int EDTokenIsOpcode(EDTokenRef token); - -/*! - @function EDTokenIsLiteral - @param token The token to be queried. - @result 1 if the token is a numeric literal; 0 if not; -1 on error. - */ -int EDTokenIsLiteral(EDTokenRef token); - -/*! - @function EDTokenIsRegister - @param token The token to be queried. - @result 1 if the token identifies a register; 0 if not; -1 on error. - */ -int EDTokenIsRegister(EDTokenRef token); - -/*! - @function EDTokenIsNegativeLiteral - @param token The token to be queried. - @result 1 if the token is a negative signed literal; 0 if not; -1 on error. - */ -int EDTokenIsNegativeLiteral(EDTokenRef token); - -/*! - @function EDLiteralTokenAbsoluteValue - @param value A pointer whose target will be filled in with the absolute value - of the literal. - @param token The token to be queried. - @result 0 on success; -1 otherwise. - */ -int EDLiteralTokenAbsoluteValue(uint64_t *value, - EDTokenRef token); - -/*! - @function EDRegisterTokenValue - @param registerID A pointer whose target will be filled in with the LLVM - register identifier for the token. - @param token The token to be queried. - @result 0 on success; -1 otherwise. - */ -int EDRegisterTokenValue(unsigned *registerID, - EDTokenRef token); - -/*! - @functiongroup Creating and querying operands - */ - -/*! - @function EDNumOperands - @param inst The instruction to be queried. - @result The number of operands in the instruction, or -1 on error. - */ -int EDNumOperands(EDInstRef inst); - -/*! - @function EDGetOperand - Retrieves an operand from an instruction. The operand is valid until the - instruction is released. - @param operand A pointer to be filled in with the operand. - @param inst The instruction to be queried. - @param index The index of the operand in the instruction. - @result 0 on success; -1 otherwise. - */ -int EDGetOperand(EDOperandRef *operand, - EDInstRef inst, - int index); - -/*! - @function EDOperandIsRegister - @param operand The operand to be queried. - @result 1 if the operand names a register; 0 if not; -1 on error. - */ -int EDOperandIsRegister(EDOperandRef operand); - -/*! - @function EDOperandIsImmediate - @param operand The operand to be queried. - @result 1 if the operand specifies an immediate value; 0 if not; -1 on error. - */ -int EDOperandIsImmediate(EDOperandRef operand); - -/*! - @function EDOperandIsMemory - @param operand The operand to be queried. - @result 1 if the operand specifies a location in memory; 0 if not; -1 on error. - */ -int EDOperandIsMemory(EDOperandRef operand); - -/*! - @function EDRegisterOperandValue - @param value A pointer whose target will be filled in with the LLVM register ID - of the register named by the operand. - @param operand The operand to be queried. - @result 0 on success; -1 otherwise. - */ -int EDRegisterOperandValue(unsigned *value, - EDOperandRef operand); - -/*! - @function EDImmediateOperandValue - @param value A pointer whose target will be filled in with the value of the - immediate. - @param operand The operand to be queried. - @result 0 on success; -1 otherwise. - */ -int EDImmediateOperandValue(uint64_t *value, - EDOperandRef operand); - -/*! - @function EDEvaluateOperand - Evaluates an operand using a client-supplied register state accessor. Register - operands are evaluated by reading the value of the register; immediate operands - are evaluated by reporting the immediate value; memory operands are evaluated - by computing the target address (with only those relocations applied that were - already applied to the original bytes). - @param result A pointer whose target is to be filled with the result of - evaluating the operand. - @param operand The operand to be evaluated. - @param regReader The function to use when reading registers from the register - state. - @param arg An anonymous argument for client use. - @result 0 if the operand could be evaluated; -1 otherwise. - */ -int EDEvaluateOperand(uint64_t *result, - EDOperandRef operand, - EDRegisterReaderCallback regReader, - void *arg); - -#ifdef __BLOCKS__ - -/*! - @typedef EDByteBlock_t - Block-based interface to memory from which instructions may be read. - @param byte A pointer whose target should be filled in with the data returned. - @param address The address of the byte to be read. - @result 0 on success; -1 otherwise. - */ -typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address); - -/*! - @typedef EDRegisterBlock_t - Block-based interface to registers from which registers may be read. - @param value A pointer whose target should be filled in with the value of the - register. - @param regID The LLVM register identifier for the register to read. - @result 0 if the register could be read; -1 otherwise. - */ -typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); - -/*! - @typedef EDTokenVisitor_t - Block-based handler for individual tokens. - @param token The current token being read. - @result 0 to continue; 1 to stop normally; -1 on error. - */ -typedef int (^EDTokenVisitor_t)(EDTokenRef token); - -/*! @functiongroup Block-based interfaces */ - -/*! - @function EDBlockCreateInsts - Gets a set of contiguous instructions from a disassembler, using a block to - read memory. - @param insts A pointer to an array that will be filled in with the - instructions. Must have at least count entries. Entries not filled in will - be set to NULL. - @param count The maximum number of instructions to fill in. - @param disassembler The disassembler to use when decoding the instructions. - @param byteBlock The block to use when reading the instruction's machine - code. - @param address The address of the first byte of the instruction. - @result The number of instructions read on success; 0 otherwise. - */ -unsigned int EDBlockCreateInsts(EDInstRef *insts, - int count, - EDDisassemblerRef disassembler, - EDByteBlock_t byteBlock, - uint64_t address); - -/*! - @function EDBlockEvaluateOperand - Evaluates an operand using a block to read registers. - @param result A pointer whose target is to be filled with the result of - evaluating the operand. - @param operand The operand to be evaluated. - @param regBlock The block to use when reading registers from the register - state. - @result 0 if the operand could be evaluated; -1 otherwise. - */ -int EDBlockEvaluateOperand(uint64_t *result, - EDOperandRef operand, - EDRegisterBlock_t regBlock); - -/*! - @function EDBlockVisitTokens - Visits every token with a visitor. - @param inst The instruction with the tokens to be visited. - @param visitor The visitor. - @result 0 if the visit ended normally; -1 if the visitor encountered an error - or there was some other error. - */ -int EDBlockVisitTokens(EDInstRef inst, - EDTokenVisitor_t visitor); - -/** - * @} - */ - -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h index cb3ab9e3f393..ada473818da6 100644 --- a/include/llvm-c/Initialization.h +++ b/include/llvm-c/Initialization.h @@ -34,6 +34,7 @@ extern "C" { void LLVMInitializeCore(LLVMPassRegistryRef R); void LLVMInitializeTransformUtils(LLVMPassRegistryRef R); void LLVMInitializeScalarOpts(LLVMPassRegistryRef R); +void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R); void LLVMInitializeVectorization(LLVMPassRegistryRef R); void LLVMInitializeInstCombine(LLVMPassRegistryRef R); void LLVMInitializeIPO(LLVMPassRegistryRef R); diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h index 5338d3fc4c85..7a0fbf65bedb 100644 --- a/include/llvm-c/LinkTimeOptimizer.h +++ b/include/llvm-c/LinkTimeOptimizer.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef __LTO_CAPI_H__ -#define __LTO_CAPI_H__ +#ifndef LLVM_C_LINKTIMEOPTIMIZER_H +#define LLVM_C_LINKTIMEOPTIMIZER_H #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h index 29668de46529..691abdfcb47a 100644 --- a/include/llvm-c/TargetMachine.h +++ b/include/llvm-c/TargetMachine.h @@ -20,6 +20,7 @@ #define LLVM_C_TARGETMACHINE_H #include "llvm-c/Core.h" +#include "llvm-c/Target.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h index cee6e5a0ee08..82e513d4905d 100644 --- a/include/llvm-c/Transforms/PassManagerBuilder.h +++ b/include/llvm-c/Transforms/PassManagerBuilder.h @@ -11,8 +11,8 @@ |* *| \*===----------------------------------------------------------------------===*/ -#ifndef LLVM_C_PASSMANAGERBUILDER -#define LLVM_C_PASSMANAGERBUILDER +#ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H +#define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H #include "llvm-c/Core.h" @@ -77,8 +77,8 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, /** See llvm::PassManagerBuilder::populateLTOPassManager. */ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM, - bool Internalize, - bool RunInliner); + LLVMBool Internalize, + LLVMBool RunInliner); /** * @} diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index f43d365e3dbe..40110fddfc13 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -13,8 +13,8 @@ |* *| \*===----------------------------------------------------------------------===*/ -#ifndef LTO_H -#define LTO_H 1 +#ifndef LLVM_C_LTO_H +#define LLVM_C_LTO_H #include #include @@ -291,6 +291,13 @@ lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name); extern void lto_codegen_debug_options(lto_code_gen_t cg, const char *); +/** + * Initializes LLVM disassemblers. + * FIXME: This doesn't really belong here. + */ +extern void +lto_initialize_disassembler(void); + #ifdef __cplusplus } #endif diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 31c6e6adbfc6..14bcaef6d165 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -97,8 +97,8 @@ nexttoward. */ -#ifndef LLVM_FLOAT_H -#define LLVM_FLOAT_H +#ifndef LLVM_ADT_APFLOAT_H +#define LLVM_ADT_APFLOAT_H // APInt contains static functions implementing bignum arithmetic. #include "llvm/ADT/APInt.h" @@ -184,9 +184,9 @@ namespace llvm { APFloat(const fltSemantics &, integerPart); APFloat(const fltSemantics &, fltCategory, bool negative); APFloat(const fltSemantics &, uninitializedTag); + APFloat(const fltSemantics &, const APInt &); explicit APFloat(double d); explicit APFloat(float f); - explicit APFloat(const APInt &, bool isIEEE = false); APFloat(const APFloat &); ~APFloat(); @@ -300,7 +300,7 @@ namespace llvm { /* The definition of equality is not straightforward for floating point, so we won't use operator==. Use one of the following, or write whatever it is you really mean. */ - // bool operator==(const APFloat &) const; // DO NOT IMPLEMENT + bool operator==(const APFloat &) const LLVM_DELETED_FUNCTION; /* IEEE comparison with another floating point number (NaNs compare unordered, 0==-0). */ @@ -327,6 +327,7 @@ namespace llvm { bool isNegative() const { return sign; } bool isPosZero() const { return isZero() && !isNegative(); } bool isNegZero() const { return isZero() && isNegative(); } + bool isDenormal() const; APFloat& operator=(const APFloat &); @@ -422,7 +423,7 @@ namespace llvm { APInt convertQuadrupleAPFloatToAPInt() const; APInt convertF80LongDoubleAPFloatToAPInt() const; APInt convertPPCDoubleDoubleAPFloatToAPInt() const; - void initFromAPInt(const APInt& api, bool isIEEE = false); + void initFromAPInt(const fltSemantics *Sem, const APInt& api); void initFromHalfAPInt(const APInt& api); void initFromFloatAPInt(const APInt& api); void initFromDoubleAPInt(const APInt& api); @@ -462,4 +463,4 @@ namespace llvm { hash_code hash_value(const APFloat &Arg); } /* namespace llvm */ -#endif /* LLVM_FLOAT_H */ +#endif /* LLVM_ADT_APFLOAT_H */ diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index c7c8016b8339..3d8b72d9aaf4 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_APINT_H -#define LLVM_APINT_H +#ifndef LLVM_ADT_APINT_H +#define LLVM_ADT_APINT_H #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Compiler.h" @@ -274,7 +274,7 @@ public: initSlowCase(that); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES /// @brief Move Constructor. APInt(APInt&& that) : BitWidth(that.BitWidth), VAL(that.VAL) { that.BitWidth = 0; @@ -427,7 +427,7 @@ public: /// @returns the all-ones value for an APInt of the specified bit-width. /// @brief Get the all-ones value. static APInt getAllOnesValue(unsigned numBits) { - return APInt(numBits, -1ULL, true); + return APInt(numBits, UINT64_MAX, true); } /// @returns the '0' value for an APInt of the specified bit-width. @@ -498,13 +498,24 @@ public: if (loBitsSet == 0) return APInt(numBits, 0); if (loBitsSet == APINT_BITS_PER_WORD) - return APInt(numBits, -1ULL); + return APInt(numBits, UINT64_MAX); // For small values, return quickly. if (loBitsSet <= APINT_BITS_PER_WORD) - return APInt(numBits, -1ULL >> (APINT_BITS_PER_WORD - loBitsSet)); + return APInt(numBits, UINT64_MAX >> (APINT_BITS_PER_WORD - loBitsSet)); return getAllOnesValue(numBits).lshr(numBits - loBitsSet); } + /// \brief Return a value containing V broadcasted over NewLen bits. + static APInt getSplat(unsigned NewLen, const APInt &V) { + assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); + + APInt Val = V.zextOrSelf(NewLen); + for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) + Val |= Val << I; + + return Val; + } + /// \brief Determine if two APInts have the same value, after zero-extending /// one of them (if needed!) to ensure that the bit-widths match. static bool isSameValue(const APInt &I1, const APInt &I2) { @@ -601,7 +612,7 @@ public: return AssignSlowCase(RHS); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES /// @brief Move assignment operator. APInt& operator=(APInt&& that) { if (!isSingleWord()) @@ -799,16 +810,7 @@ public: /// Signed divide this APInt by APInt RHS. /// @brief Signed division function for APInt. - APInt sdiv(const APInt &RHS) const { - if (isNegative()) - if (RHS.isNegative()) - return (-(*this)).udiv(-RHS); - else - return -((-(*this)).udiv(RHS)); - else if (RHS.isNegative()) - return -(this->udiv(-RHS)); - return this->udiv(RHS); - } + APInt sdiv(const APInt &RHS) const; /// Perform an unsigned remainder operation on this APInt with RHS being the /// divisor. Both this and RHS are treated as unsigned quantities for purposes @@ -821,16 +823,7 @@ public: /// Signed remainder operation on APInt. /// @brief Function for signed remainder operation. - APInt srem(const APInt &RHS) const { - if (isNegative()) - if (RHS.isNegative()) - return -((-(*this)).urem(-RHS)); - else - return -((-(*this)).urem(RHS)); - else if (RHS.isNegative()) - return this->urem(-RHS); - return this->urem(RHS); - } + APInt srem(const APInt &RHS) const; /// Sometimes it is convenient to divide two APInt values and obtain both the /// quotient and remainder. This function does both operations in the same @@ -842,24 +835,9 @@ public: APInt &Quotient, APInt &Remainder); static void sdivrem(const APInt &LHS, const APInt &RHS, - APInt &Quotient, APInt &Remainder) { - if (LHS.isNegative()) { - if (RHS.isNegative()) - APInt::udivrem(-LHS, -RHS, Quotient, Remainder); - else { - APInt::udivrem(-LHS, RHS, Quotient, Remainder); - Quotient = -Quotient; - } - Remainder = -Remainder; - } else if (RHS.isNegative()) { - APInt::udivrem(LHS, -RHS, Quotient, Remainder); - Quotient = -Quotient; - } else { - APInt::udivrem(LHS, RHS, Quotient, Remainder); - } - } - - + APInt &Quotient, APInt &Remainder); + + // Operations that return overflow indicators. APInt sadd_ov(const APInt &RHS, bool &Overflow) const; APInt uadd_ov(const APInt &RHS, bool &Overflow) const; @@ -1113,11 +1091,11 @@ public: /// @brief Set every bit to 1. void setAllBits() { if (isSingleWord()) - VAL = -1ULL; + VAL = UINT64_MAX; else { // Set all the bits in all the words. for (unsigned i = 0; i < getNumWords(); ++i) - pVal[i] = -1ULL; + pVal[i] = UINT64_MAX; } // Clear the unused ones clearUnusedBits(); @@ -1142,10 +1120,10 @@ public: /// @brief Toggle every bit to its opposite value. void flipAllBits() { if (isSingleWord()) - VAL ^= -1ULL; + VAL ^= UINT64_MAX; else { for (unsigned i = 0; i < getNumWords(); ++i) - pVal[i] ^= -1ULL; + pVal[i] ^= UINT64_MAX; } clearUnusedBits(); } @@ -1191,7 +1169,8 @@ public: /// APInt. This is used in conjunction with getActiveData to extract the raw /// value of the APInt. unsigned getActiveWords() const { - return whichWord(getActiveBits()-1) + 1; + unsigned numActiveBits = getActiveBits(); + return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1; } /// Computes the minimum bit width for this APInt while considering it to be diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h index 048c65ce2c77..11be4c513e2c 100644 --- a/include/llvm/ADT/APSInt.h +++ b/include/llvm/ADT/APSInt.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_APSINT_H -#define LLVM_APSINT_H +#ifndef LLVM_ADT_APSINT_H +#define LLVM_ADT_APSINT_H #include "llvm/ADT/APInt.h" @@ -23,7 +23,7 @@ class APSInt : public APInt { bool IsUnsigned; public: /// Default constructor that creates an uninitialized APInt. - explicit APSInt() {} + explicit APSInt() : IsUnsigned(false) {} /// APSInt ctor - Create an APSInt with the specified width, default to /// unsigned. @@ -161,11 +161,11 @@ public: } APSInt& operator++() { - static_cast(*this)++; + ++(static_cast(*this)); return *this; } APSInt& operator--() { - static_cast(*this)--; + --(static_cast(*this)); return *this; } APSInt operator++(int) { diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 1e35d6279219..c555c1c2b1d5 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -33,6 +33,8 @@ namespace llvm { typedef const T *const_iterator; typedef size_t size_type; + typedef std::reverse_iterator reverse_iterator; + private: /// The start of the array, in an external buffer. const T *Data; @@ -84,6 +86,9 @@ namespace llvm { iterator begin() const { return Data; } iterator end() const { return Data + Length; } + reverse_iterator rbegin() const { return reverse_iterator(end()); } + reverse_iterator rend() const { return reverse_iterator(begin()); } + /// empty - Check if the array is empty. bool empty() const { return Length == 0; } @@ -171,41 +176,41 @@ namespace llvm { /// Construct an empty ArrayRef. /*implicit*/ MutableArrayRef() : ArrayRef() {} - + /// Construct an MutableArrayRef from a single element. /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef(OneElt) {} - + /// Construct an MutableArrayRef from a pointer and length. /*implicit*/ MutableArrayRef(T *data, size_t length) : ArrayRef(data, length) {} - + /// Construct an MutableArrayRef from a range. MutableArrayRef(T *begin, T *end) : ArrayRef(begin, end) {} - + /// Construct an MutableArrayRef from a SmallVector. /*implicit*/ MutableArrayRef(SmallVectorImpl &Vec) : ArrayRef(Vec) {} - + /// Construct a MutableArrayRef from a std::vector. /*implicit*/ MutableArrayRef(std::vector &Vec) : ArrayRef(Vec) {} - + /// Construct an MutableArrayRef from a C array. template /*implicit*/ MutableArrayRef(T (&Arr)[N]) : ArrayRef(Arr) {} - + T *data() const { return const_cast(ArrayRef::data()); } iterator begin() const { return data(); } iterator end() const { return data() + this->size(); } - + /// front - Get the first element. T &front() const { assert(!this->empty()); return data()[0]; } - + /// back - Get the last element. T &back() const { assert(!this->empty()); @@ -217,14 +222,14 @@ namespace llvm { assert(N <= this->size() && "Invalid specifier"); return MutableArrayRef(data()+N, this->size()-N); } - + /// slice(n, m) - Chop off the first N elements of the array, and keep M /// elements in the array. MutableArrayRef slice(unsigned N, unsigned M) const { assert(N+M <= this->size() && "Invalid specifier"); return MutableArrayRef(data()+N, M); } - + /// @} /// @name Operator Overloads /// @{ @@ -301,5 +306,5 @@ namespace llvm { static const bool value = true; }; } - + #endif diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index 9d6388f7ee61..82cfdf437d4e 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -98,7 +98,7 @@ public: std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord)); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES BitVector(BitVector &&RHS) : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity) { RHS.Bits = 0; @@ -452,7 +452,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES const BitVector &operator=(BitVector &&RHS) { if (this == &RHS) return *this; diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h index 2dfed075dea5..3dd862c8b220 100644 --- a/include/llvm/ADT/DAGDeltaAlgorithm.h +++ b/include/llvm/ADT/DAGDeltaAlgorithm.h @@ -9,8 +9,8 @@ #ifndef LLVM_ADT_DAGDELTAALGORITHM_H #define LLVM_ADT_DAGDELTAALGORITHM_H -#include #include +#include namespace llvm { diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h index 7bf7960c63a9..4d07e044781f 100644 --- a/include/llvm/ADT/DeltaAlgorithm.h +++ b/include/llvm/ADT/DeltaAlgorithm.h @@ -9,8 +9,8 @@ #ifndef LLVM_ADT_DELTAALGORITHM_H #define LLVM_ADT_DELTAALGORITHM_H -#include #include +#include namespace llvm { diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index ac4bdbd126c5..d41061996436 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -14,20 +14,20 @@ #ifndef LLVM_ADT_DENSEMAP_H #define LLVM_ADT_DENSEMAP_H -#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/type_traits.h" -#include "llvm/ADT/DenseMapInfo.h" #include -#include -#include -#include #include #include #include #include +#include +#include +#include namespace llvm { @@ -75,7 +75,7 @@ public: void clear() { if (getNumEntries() == 0 && getNumTombstones() == 0) return; - + // If the capacity of the array is huge, and the # elements used is small, // shrink the array. if (getNumEntries() * 4 < getNumBuckets() && getNumBuckets() > 64) { @@ -159,6 +159,24 @@ public: return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true); } +#if LLVM_HAS_RVALUE_REFERENCES + // Inserts key,value pair into the map if the key isn't already in the map. + // If the key is already in the map, it returns false and doesn't update the + // value. + std::pair insert(std::pair &&KV) { + BucketT *TheBucket; + if (LookupBucketFor(KV.first, TheBucket)) + return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), + false); // Already in map. + + // Otherwise, insert the new element. + TheBucket = InsertIntoBucket(std::move(KV.first), + std::move(KV.second), + TheBucket); + return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true); + } +#endif + /// insert - Range insertion of pairs. template void insert(InputIt I, InputIt E) { @@ -198,7 +216,7 @@ public: return FindAndConstruct(Key).second; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES value_type& FindAndConstruct(KeyT &&Key) { BucketT *TheBucket; if (LookupBucketFor(Key, TheBucket)) @@ -383,7 +401,7 @@ private: return TheBucket; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES BucketT *InsertIntoBucket(const KeyT &Key, ValueT &&Value, BucketT *TheBucket) { TheBucket = InsertIntoBucketImpl(Key, TheBucket); @@ -430,7 +448,8 @@ private: incrementNumEntries(); // If we are writing over a tombstone, remember this. - if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey())) + const KeyT EmptyKey = getEmptyKey(); + if (!KeyInfoT::isEqual(TheBucket->first, EmptyKey)) decrementNumTombstones(); return TheBucket; @@ -474,7 +493,6 @@ private: if (KeyInfoT::isEqual(ThisBucket->first, EmptyKey)) { // If we've already seen a tombstone while probing, fill it in instead // of the empty bucket we eventually probed to. - if (FoundTombstone) ThisBucket = FoundTombstone; FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket; return false; } @@ -531,13 +549,13 @@ public: init(NumInitBuckets); } - DenseMap(const DenseMap &other) { + DenseMap(const DenseMap &other) : BaseT() { init(0); copyFrom(other); } -#if LLVM_USE_RVALUE_REFERENCES - DenseMap(DenseMap &&other) { +#if LLVM_HAS_RVALUE_REFERENCES + DenseMap(DenseMap &&other) : BaseT() { init(0); swap(other); } @@ -566,7 +584,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES DenseMap& operator=(DenseMap &&other) { this->destroyAll(); operator delete(Buckets); @@ -700,7 +718,7 @@ public: copyFrom(other); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES SmallDenseMap(SmallDenseMap &&other) { init(0); swap(other); @@ -795,7 +813,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES SmallDenseMap& operator=(SmallDenseMap &&other) { this->destroyAll(); deallocateBuckets(); @@ -1027,7 +1045,7 @@ private: ++Ptr; } }; - + template static inline size_t capacity_in_bytes(const DenseMap &X) { diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index 8ab9a33200c3..d699ad51ada4 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -32,8 +32,10 @@ public: bool empty() const { return TheMap.empty(); } unsigned size() const { return TheMap.size(); } + size_t getMemorySize() const { return TheMap.getMemorySize(); } - /// Grow the denseset so that it has at least Size buckets. Does not shrink + /// Grow the DenseSet so that it has at least Size buckets. Will not shrink + /// the Size of the set. void resize(size_t Size) { TheMap.resize(Size); } void clear() { diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index 519b18052b6d..644544253ab7 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -34,8 +34,8 @@ #define LLVM_ADT_DEPTHFIRSTITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" #include #include diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index 375d84abebdd..91794dea6981 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -16,9 +16,9 @@ #ifndef LLVM_ADT_FOLDINGSET_H #define LLVM_ADT_FOLDINGSET_H -#include "llvm/Support/DataTypes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class APFloat; diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h index fa7ccb975e52..6793c6b9c205 100644 --- a/include/llvm/ADT/ImmutableIntervalMap.h +++ b/include/llvm/ADT/ImmutableIntervalMap.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H -#define LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H +#ifndef LLVM_ADT_IMMUTABLEINTERVALMAP_H +#define LLVM_ADT_IMMUTABLEINTERVALMAP_H #include "llvm/ADT/ImmutableMap.h" diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h index 20bdd903f7a5..7f0c239423bd 100644 --- a/include/llvm/ADT/ImmutableList.h +++ b/include/llvm/ADT/ImmutableList.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_IMLIST_H -#define LLVM_ADT_IMLIST_H +#ifndef LLVM_ADT_IMMUTABLELIST_H +#define LLVM_ADT_IMMUTABLELIST_H -#include "llvm/Support/Allocator.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h index 4883c5ba0a6b..a667479a4d17 100644 --- a/include/llvm/ADT/ImmutableMap.h +++ b/include/llvm/ADT/ImmutableMap.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_IMMAP_H -#define LLVM_ADT_IMMAP_H +#ifndef LLVM_ADT_IMMUTABLEMAP_H +#define LLVM_ADT_IMMUTABLEMAP_H #include "llvm/ADT/ImmutableSet.h" @@ -211,17 +211,22 @@ public: friend class ImmutableMap; public: - value_type_ref operator*() const { return itr->getValue(); } - value_type* operator->() const { return &itr->getValue(); } + typedef typename ImmutableMap::value_type value_type; + typedef typename ImmutableMap::value_type_ref reference; + typedef typename iterator::value_type *pointer; + typedef std::bidirectional_iterator_tag iterator_category; + + typename iterator::reference operator*() const { return itr->getValue(); } + typename iterator::pointer operator->() const { return &itr->getValue(); } key_type_ref getKey() const { return itr->getValue().first; } data_type_ref getData() const { return itr->getValue().second; } - iterator& operator++() { ++itr; return *this; } iterator operator++(int) { iterator tmp(*this); ++itr; return tmp; } iterator& operator--() { --itr; return *this; } iterator operator--(int) { iterator tmp(*this); --itr; return tmp; } + bool operator==(const iterator& RHS) const { return RHS.itr == itr; } bool operator!=(const iterator& RHS) const { return RHS.itr != itr; } }; @@ -288,6 +293,13 @@ public: Factory(F) { if (Root) { Root->retain(); } } + + explicit ImmutableMapRef(const ImmutableMap &X, + typename ImmutableMap::Factory &F) + : Root(X.getRootWithoutRetain()), + Factory(F.getTreeFactory()) { + if (Root) { Root->retain(); } + } ImmutableMapRef(const ImmutableMapRef &X) : Root(X.Root), @@ -318,12 +330,20 @@ public: return ImmutableMapRef(0, F); } - ImmutableMapRef add(key_type_ref K, data_type_ref D) { + void manualRetain() { + if (Root) Root->retain(); + } + + void manualRelease() { + if (Root) Root->release(); + } + + ImmutableMapRef add(key_type_ref K, data_type_ref D) const { TreeTy *NewT = Factory->add(Root, std::pair(K, D)); return ImmutableMapRef(NewT, Factory); } - ImmutableMapRef remove(key_type_ref K) { + ImmutableMapRef remove(key_type_ref K) const { TreeTy *NewT = Factory->remove(Root, K); return ImmutableMapRef(NewT, Factory); } diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h index 3900f96be16a..fbdf066e61ab 100644 --- a/include/llvm/ADT/ImmutableSet.h +++ b/include/llvm/ADT/ImmutableSet.h @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_IMSET_H -#define LLVM_ADT_IMSET_H +#ifndef LLVM_ADT_IMMUTABLESET_H +#define LLVM_ADT_IMMUTABLESET_H -#include "llvm/Support/Allocator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include @@ -1054,18 +1054,27 @@ public: class iterator { typename TreeTy::iterator itr; + + iterator() {} iterator(TreeTy* t) : itr(t) {} friend class ImmutableSet; + public: - iterator() {} - inline value_type_ref operator*() const { return itr->getValue(); } - inline iterator& operator++() { ++itr; return *this; } - inline iterator operator++(int) { iterator tmp(*this); ++itr; return tmp; } - inline iterator& operator--() { --itr; return *this; } - inline iterator operator--(int) { iterator tmp(*this); --itr; return tmp; } - inline bool operator==(const iterator& RHS) const { return RHS.itr == itr; } - inline bool operator!=(const iterator& RHS) const { return RHS.itr != itr; } - inline value_type *operator->() const { return &(operator*()); } + typedef typename ImmutableSet::value_type value_type; + typedef typename ImmutableSet::value_type_ref reference; + typedef typename iterator::value_type *pointer; + typedef std::bidirectional_iterator_tag iterator_category; + + typename iterator::reference operator*() const { return itr->getValue(); } + typename iterator::pointer operator->() const { return &(operator*()); } + + iterator& operator++() { ++itr; return *this; } + iterator operator++(int) { iterator tmp(*this); ++itr; return tmp; } + iterator& operator--() { --itr; return *this; } + iterator operator--(int) { iterator tmp(*this); --itr; return tmp; } + + bool operator==(const iterator& RHS) const { return RHS.itr == itr; } + bool operator!=(const iterator& RHS) const { return RHS.itr != itr; } }; iterator begin() const { return iterator(Root); } diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 931b67e40911..c4083eed6a99 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -99,8 +99,8 @@ #ifndef LLVM_ADT_INTERVALMAP_H #define LLVM_ADT_INTERVALMAP_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/RecyclingAllocator.h" #include @@ -151,6 +151,26 @@ struct IntervalMapInfo { }; +template +struct IntervalMapHalfOpenInfo { + + /// startLess - Return true if x is not in [a;b). + static inline bool startLess(const T &x, const T &a) { + return x < a; + } + + /// stopLess - Return true if x is not in [a;b). + static inline bool stopLess(const T &b, const T &x) { + return b <= x; + } + + /// adjacent - Return true when the intervals [x;a) and [b;y) can coalesce. + static inline bool adjacent(const T &a, const T &b) { + return a == b; + } + +}; + /// IntervalMapImpl - Namespace used for IntervalMap implementation details. /// It should be considered private to the implementation. namespace IntervalMapImpl { diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index a9724ee15447..b8b88619957e 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -18,8 +18,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_INTRUSIVE_REF_CNT_PTR -#define LLVM_ADT_INTRUSIVE_REF_CNT_PTR +#ifndef LLVM_ADT_INTRUSIVEREFCNTPTR_H +#define LLVM_ADT_INTRUSIVEREFCNTPTR_H #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -123,7 +123,7 @@ namespace llvm { retain(); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES IntrusiveRefCntPtr(IntrusiveRefCntPtr&& S) : Obj(S.Obj) { S.Obj = 0; } @@ -226,13 +226,13 @@ namespace llvm { template struct simplify_type > { typedef T* SimpleType; - static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr& Val) { + static SimpleType getSimplifiedValue(IntrusiveRefCntPtr& Val) { return Val.getPtr(); } }; template struct simplify_type > { - typedef T* SimpleType; + typedef /*const*/ T* SimpleType; static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr& Val) { return Val.getPtr(); } @@ -240,4 +240,4 @@ namespace llvm { } // end namespace llvm -#endif // LLVM_ADT_INTRUSIVE_REF_CNT_PTR +#endif // LLVM_ADT_INTRUSIVEREFCNTPTR_H diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h index 6aacca5a6f0f..f6fcb0888de3 100644 --- a/include/llvm/ADT/MapVector.h +++ b/include/llvm/ADT/MapVector.h @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include namespace llvm { @@ -63,6 +64,11 @@ public: return Vector.empty(); } + std::pair &front() { return Vector.front(); } + const std::pair &front() const { return Vector.front(); } + std::pair &back() { return Vector.back(); } + const std::pair &back() const { return Vector.back(); } + void clear() { Map.clear(); Vector.clear(); @@ -79,10 +85,46 @@ public: return Vector[I].second; } + ValueT lookup(const KeyT &Key) const { + typename MapType::const_iterator Pos = Map.find(Key); + return Pos == Map.end()? ValueT() : Vector[Pos->second].second; + } + + std::pair insert(const std::pair &KV) { + std::pair Pair = std::make_pair(KV.first, 0); + std::pair Result = Map.insert(Pair); + unsigned &I = Result.first->second; + if (Result.second) { + Vector.push_back(std::make_pair(KV.first, KV.second)); + I = Vector.size() - 1; + return std::make_pair(llvm::prior(end()), true); + } + return std::make_pair(begin() + I, false); + } + unsigned count(const KeyT &Key) const { typename MapType::const_iterator Pos = Map.find(Key); return Pos == Map.end()? 0 : 1; } + + iterator find(const KeyT &Key) { + typename MapType::const_iterator Pos = Map.find(Key); + return Pos == Map.end()? Vector.end() : + (Vector.begin() + Pos->second); + } + + const_iterator find(const KeyT &Key) const { + typename MapType::const_iterator Pos = Map.find(Key); + return Pos == Map.end()? Vector.end() : + (Vector.begin() + Pos->second); + } + + /// \brief Remove the last element from the vector. + void pop_back() { + typename MapType::iterator Pos = Map.find(Vector.back().first); + Map.erase(Pos); + Vector.pop_back(); + } }; } diff --git a/include/llvm/ADT/None.h b/include/llvm/ADT/None.h new file mode 100644 index 000000000000..5793bd2faef4 --- /dev/null +++ b/include/llvm/ADT/None.h @@ -0,0 +1,27 @@ +//===-- None.h - Simple null value for implicit construction ------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides None, an enumerator for use in implicit constructors +// of various (usually templated) types to make such construction more +// terse. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_NONE_H +#define LLVM_ADT_NONE_H + +namespace llvm { +/// \brief A simple null object to allow implicit construction of Optional +/// and similar types without having to spell out the specialization's name. +enum NoneType { + None +}; +} + +#endif diff --git a/include/llvm/ADT/NullablePtr.h b/include/llvm/ADT/NullablePtr.h index a9c47a138eca..8ddfd5d20abd 100644 --- a/include/llvm/ADT/NullablePtr.h +++ b/include/llvm/ADT/NullablePtr.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_NULLABLE_PTR_H -#define LLVM_ADT_NULLABLE_PTR_H +#ifndef LLVM_ADT_NULLABLEPTR_H +#define LLVM_ADT_NULLABLEPTR_H #include #include diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index f43aeb1bc4d9..194e53fac213 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -13,13 +13,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_OPTIONAL -#define LLVM_ADT_OPTIONAL +#ifndef LLVM_ADT_OPTIONAL_H +#define LLVM_ADT_OPTIONAL_H +#include "llvm/ADT/None.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/AlignOf.h" #include -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES #include #endif @@ -27,54 +29,116 @@ namespace llvm { template class Optional { - T x; - unsigned hasVal : 1; + AlignedCharArrayUnion storage; + bool hasVal; public: - explicit Optional() : x(), hasVal(false) {} - Optional(const T &y) : x(y), hasVal(true) {} + Optional(NoneType) : hasVal(false) {} + explicit Optional() : hasVal(false) {} + Optional(const T &y) : hasVal(true) { + new (storage.buffer) T(y); + } + Optional(const Optional &O) : hasVal(O.hasVal) { + if (hasVal) + new (storage.buffer) T(*O); + } -#if LLVM_USE_RVALUE_REFERENCES - Optional(T &&y) : x(std::forward(y)), hasVal(true) {} +#if LLVM_HAS_RVALUE_REFERENCES + Optional(T &&y) : hasVal(true) { + new (storage.buffer) T(std::forward(y)); + } + Optional(Optional &&O) : hasVal(O) { + if (O) { + new (storage.buffer) T(std::move(*O)); + O.reset(); + } + } + Optional &operator=(T &&y) { + if (hasVal) + **this = std::move(y); + else { + new (storage.buffer) T(std::move(y)); + hasVal = true; + } + return *this; + } + Optional &operator=(Optional &&O) { + if (!O) + reset(); + else { + *this = std::move(*O); + O.reset(); + } + return *this; + } #endif static inline Optional create(const T* y) { return y ? Optional(*y) : Optional(); } + // FIXME: these assignments (& the equivalent const T&/const Optional& ctors) + // could be made more efficient by passing by value, possibly unifying them + // with the rvalue versions above - but this could place a different set of + // requirements (notably: the existence of a default ctor) when implemented + // in that way. Careful SFINAE to avoid such pitfalls would be required. Optional &operator=(const T &y) { - x = y; - hasVal = true; + if (hasVal) + **this = y; + else { + new (storage.buffer) T(y); + hasVal = true; + } return *this; } - - const T* getPointer() const { assert(hasVal); return &x; } - const T& getValue() const { assert(hasVal); return x; } - operator bool() const { return hasVal; } + Optional &operator=(const Optional &O) { + if (!O) + reset(); + else + *this = *O; + return *this; + } + + void reset() { + if (hasVal) { + (**this).~T(); + hasVal = false; + } + } + + ~Optional() { + reset(); + } + + const T* getPointer() const { assert(hasVal); return reinterpret_cast(storage.buffer); } + T* getPointer() { assert(hasVal); return reinterpret_cast(storage.buffer); } + const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + T& getValue() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + + LLVM_EXPLICIT operator bool() const { return hasVal; } bool hasValue() const { return hasVal; } const T* operator->() const { return getPointer(); } - const T& operator*() const { assert(hasVal); return x; } + T* operator->() { return getPointer(); } + const T& operator*() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + T& operator*() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } + +#if LLVM_HAS_RVALUE_REFERENCE_THIS + T&& getValue() && { assert(hasVal); return std::move(*getPointer()); } + T&& operator*() && { assert(hasVal); return std::move(*getPointer()); } +#endif }; -template struct simplify_type; - -template -struct simplify_type > { - typedef const T* SimpleType; - static SimpleType getSimplifiedValue(const Optional &Val) { - return Val.getPointer(); - } +template struct isPodLike; +template struct isPodLike > { + // An Optional is pod-like if T is. + static const bool value = isPodLike::value; }; -template -struct simplify_type > - : public simplify_type > {}; - /// \brief Poison comparison between two \c Optional objects. Clients needs to /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator==(const Optional &X, const Optional &Y); @@ -83,7 +147,7 @@ void operator==(const Optional &X, const Optional &Y); /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator!=(const Optional &X, const Optional &Y); @@ -92,7 +156,7 @@ void operator!=(const Optional &X, const Optional &Y); /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator<(const Optional &X, const Optional &Y); @@ -101,7 +165,7 @@ void operator<(const Optional &X, const Optional &Y); /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator<=(const Optional &X, const Optional &Y); @@ -110,7 +174,7 @@ void operator<=(const Optional &X, const Optional &Y); /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator>=(const Optional &X, const Optional &Y); @@ -119,7 +183,7 @@ void operator>=(const Optional &X, const Optional &Y); /// explicitly compare the underlying values and account for empty \c Optional /// objects. /// -/// This routine will never be defined. It returns \c void to help diagnose +/// This routine will never be defined. It returns \c void to help diagnose /// errors at compile time. template void operator>(const Optional &X, const Optional &Y); diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h index 05bcd40d0862..86f9feee2cb4 100644 --- a/include/llvm/ADT/OwningPtr.h +++ b/include/llvm/ADT/OwningPtr.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_OWNING_PTR_H -#define LLVM_ADT_OWNING_PTR_H +#ifndef LLVM_ADT_OWNINGPTR_H +#define LLVM_ADT_OWNINGPTR_H #include "llvm/Support/Compiler.h" #include @@ -32,7 +32,7 @@ class OwningPtr { public: explicit OwningPtr(T *P = 0) : Ptr(P) {} -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {} OwningPtr &operator=(OwningPtr &&Other) { @@ -95,7 +95,7 @@ class OwningArrayPtr { public: explicit OwningArrayPtr(T *P = 0) : Ptr(P) {} -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {} OwningArrayPtr &operator=(OwningArrayPtr &&Other) { diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index 71c379bad5a4..cce2efb6ac99 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -57,11 +57,13 @@ class PointerIntPair { }; public: PointerIntPair() : Value(0) {} - PointerIntPair(PointerTy Ptr, IntType Int) : Value(0) { + PointerIntPair(PointerTy Ptr, IntType Int) { assert(IntBits <= PtrTraits::NumLowBitsAvailable && "PointerIntPair formed with integer size too large for pointer"); - setPointer(Ptr); - setInt(Int); + setPointerAndInt(Ptr, Int); + } + explicit PointerIntPair(PointerTy Ptr) { + initWithPointer(Ptr); } PointerTy getPointer() const { @@ -91,6 +93,25 @@ public: Value |= IntVal << IntShift; // Set new integer. } + void initWithPointer(PointerTy Ptr) { + intptr_t PtrVal + = reinterpret_cast(PtrTraits::getAsVoidPointer(Ptr)); + assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 && + "Pointer is not sufficiently aligned"); + Value = PtrVal; + } + + void setPointerAndInt(PointerTy Ptr, IntType Int) { + intptr_t PtrVal + = reinterpret_cast(PtrTraits::getAsVoidPointer(Ptr)); + assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 && + "Pointer is not sufficiently aligned"); + intptr_t IntVal = Int; + assert(IntVal < (1 << IntBits) && "Integer too large for field"); + + Value = PtrVal | (IntVal << IntShift); + } + PointerTy const *getAddrOfPointer() const { return const_cast(this)->getAddrOfPointer(); } diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index a9e86d22002d..f42515ac77a7 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -95,15 +95,11 @@ namespace llvm { public: PointerUnion() {} - PointerUnion(PT1 V) { - Val.setPointer( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(V))); - Val.setInt(0); + PointerUnion(PT1 V) : Val( + const_cast(PointerLikeTypeTraits::getAsVoidPointer(V))) { } - PointerUnion(PT2 V) { - Val.setPointer( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(V))); - Val.setInt(1); + PointerUnion(PT2 V) : Val( + const_cast(PointerLikeTypeTraits::getAsVoidPointer(V)), 1) { } /// isNull - Return true if the pointer held in the union is null, @@ -160,15 +156,14 @@ namespace llvm { /// Assignment operators - Allow assigning into this union from either /// pointer type, setting the discriminator to remember what it came from. const PointerUnion &operator=(const PT1 &RHS) { - Val.setPointer( + Val.initWithPointer( const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS))); - Val.setInt(0); return *this; } const PointerUnion &operator=(const PT2 &RHS) { - Val.setPointer( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS))); - Val.setInt(1); + Val.setPointerAndInt( + const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS)), + 1); return *this; } diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index 7f6350e4443e..59fa3f39c91e 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -260,7 +260,7 @@ class ReversePostOrderTraversal { typedef typename GT::NodeType NodeType; std::vector Blocks; // Block list in normal PO order inline void Initialize(NodeType *BB) { - copy(po_begin(BB), po_end(BB), back_inserter(Blocks)); + std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks)); } public: typedef typename std::vector::reverse_iterator rpo_iterator; diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h index bf8a68708163..827d0b346e59 100644 --- a/include/llvm/ADT/PriorityQueue.h +++ b/include/llvm/ADT/PriorityQueue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_PRIORITY_QUEUE_H -#define LLVM_ADT_PRIORITY_QUEUE_H +#ifndef LLVM_ADT_PRIORITYQUEUE_H +#define LLVM_ADT_PRIORITYQUEUE_H #include #include diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h index 48436c667474..8ce4fd53bacd 100644 --- a/include/llvm/ADT/SCCIterator.h +++ b/include/llvm/ADT/SCCIterator.h @@ -21,8 +21,8 @@ #ifndef LLVM_ADT_SCCITERATOR_H #define LLVM_ADT_SCCITERATOR_H -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" #include namespace llvm { diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index aee500d4fb6c..dacda3652129 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -246,10 +246,10 @@ inline int array_pod_sort_comparator(const void *P1, const void *P2) { return 0; } -/// get_array_pad_sort_comparator - This is an internal helper function used to +/// get_array_pod_sort_comparator - This is an internal helper function used to /// get type deduction of T right. template -inline int (*get_array_pad_sort_comparator(const T &)) +inline int (*get_array_pod_sort_comparator(const T &)) (const void*, const void*) { return array_pod_sort_comparator; } @@ -274,7 +274,7 @@ inline void array_pod_sort(IteratorTy Start, IteratorTy End) { // Don't dereference start iterator of empty sequence. if (Start == End) return; qsort(&*Start, End-Start, sizeof(*Start), - get_array_pad_sort_comparator(*Start)); + get_array_pod_sort_comparator(*Start)); } template diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index a9cd54e13b38..652492a1538c 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -153,7 +153,7 @@ public: switchToLarge(new BitVector(*RHS.getPointer())); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES SmallBitVector(SmallBitVector &&RHS) : X(RHS.X) { RHS.X = 1; } @@ -178,9 +178,9 @@ public: unsigned count() const { if (isSmall()) { uintptr_t Bits = getSmallBits(); - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountPopulation_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountPopulation_64(Bits); llvm_unreachable("Unsupported!"); } @@ -215,9 +215,9 @@ public: uintptr_t Bits = getSmallBits(); if (Bits == 0) return -1; - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountTrailingZeros_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountTrailingZeros_64(Bits); llvm_unreachable("Unsupported!"); } @@ -233,9 +233,9 @@ public: Bits &= ~uintptr_t(0) << (Prev + 1); if (Bits == 0 || Prev + 1 >= getSmallSize()) return -1; - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountTrailingZeros_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountTrailingZeros_64(Bits); llvm_unreachable("Unsupported!"); } @@ -472,7 +472,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES const SmallBitVector &operator=(SmallBitVector &&RHS) { if (this != &RHS) { clear(); diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 3bb883088c59..8c7304197f34 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -54,8 +54,6 @@ protected: /// then the set is in 'small mode'. const void **CurArray; /// CurArraySize - The allocated size of CurArray, always a power of two. - /// Note that CurArray points to an array that has CurArraySize+1 elements in - /// it, so that the end iterator actually points to valid memory. unsigned CurArraySize; // If small, this is # elts allocated consecutively @@ -68,9 +66,6 @@ protected: SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) { assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 && "Initial size must be a power of two!"); - // The end pointer, always valid, is set to a valid element to help the - // iterator. - CurArray[SmallSize] = 0; clear(); } ~SmallPtrSetImpl(); @@ -147,9 +142,11 @@ protected: class SmallPtrSetIteratorImpl { protected: const void *const *Bucket; + const void *const *End; public: - explicit SmallPtrSetIteratorImpl(const void *const *BP) : Bucket(BP) { - AdvanceIfNotValid(); + explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E) + : Bucket(BP), End(E) { + AdvanceIfNotValid(); } bool operator==(const SmallPtrSetIteratorImpl &RHS) const { @@ -164,8 +161,10 @@ protected: /// that is. This is guaranteed to stop because the end() bucket is marked /// valid. void AdvanceIfNotValid() { - while (*Bucket == SmallPtrSetImpl::getEmptyMarker() || - *Bucket == SmallPtrSetImpl::getTombstoneMarker()) + assert(Bucket <= End); + while (Bucket != End && + (*Bucket == SmallPtrSetImpl::getEmptyMarker() || + *Bucket == SmallPtrSetImpl::getTombstoneMarker())) ++Bucket; } }; @@ -182,12 +181,13 @@ public: typedef std::ptrdiff_t difference_type; typedef std::forward_iterator_tag iterator_category; - explicit SmallPtrSetIterator(const void *const *BP) - : SmallPtrSetIteratorImpl(BP) {} + explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) + : SmallPtrSetIteratorImpl(BP, E) {} // Most methods provided by baseclass. const PtrTy operator*() const { + assert(Bucket < End); return PtrTraits::getFromVoidPointer(const_cast(*Bucket)); } @@ -236,9 +236,8 @@ template class SmallPtrSet : public SmallPtrSetImpl { // Make sure that SmallSize is a power of two, round up if not. enum { SmallSizePowTwo = RoundUpToPowerOfTwo::Val }; - /// SmallStorage - Fixed size storage used in 'small mode'. The extra element - /// ensures that the end iterator actually points to valid memory. - const void *SmallStorage[SmallSizePowTwo+1]; + /// SmallStorage - Fixed size storage used in 'small mode'. + const void *SmallStorage[SmallSizePowTwo]; typedef PointerLikeTypeTraits PtrTraits; public: SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {} @@ -275,10 +274,10 @@ public: typedef SmallPtrSetIterator iterator; typedef SmallPtrSetIterator const_iterator; inline iterator begin() const { - return iterator(CurArray); + return iterator(CurArray, CurArray+CurArraySize); } inline iterator end() const { - return iterator(CurArray+CurArraySize); + return iterator(CurArray+CurArraySize, CurArray+CurArraySize); } // Allow assignment from any smallptrset with the same element type even if it diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h index cd117f59ba76..5dfe924f6d78 100644 --- a/include/llvm/ADT/SmallSet.h +++ b/include/llvm/ADT/SmallSet.h @@ -14,8 +14,8 @@ #ifndef LLVM_ADT_SMALLSET_H #define LLVM_ADT_SMALLSET_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include namespace llvm { @@ -55,6 +55,7 @@ public: } /// insert - Insert an element into the set if it isn't already there. + /// Returns true if the element is inserted (it was not in the set before). bool insert(const T &V) { if (!isSmall()) return Set.insert(V).second; diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h index 8da99d1c125c..2cfb5b9f2a9d 100644 --- a/include/llvm/ADT/SmallString.h +++ b/include/llvm/ADT/SmallString.h @@ -77,7 +77,7 @@ public: void append(in_iter S, in_iter E) { SmallVectorImpl::append(S, E); } - + void append(size_t NumInputs, char Elt) { SmallVectorImpl::append(NumInputs, Elt); } diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 6e0fd94dfe67..7ba0a714bfc7 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -16,6 +16,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" #include #include @@ -145,16 +146,20 @@ public: } reference front() { + assert(!empty()); return begin()[0]; } const_reference front() const { + assert(!empty()); return begin()[0]; } reference back() { + assert(!empty()); return end()[-1]; } const_reference back() const { + assert(!empty()); return end()[-1]; } }; @@ -178,7 +183,7 @@ protected: /// std::move, but not all stdlibs actually provide that. template static It2 move(It1 I, It1 E, It2 Dest) { -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES for (; I != E; ++I, ++Dest) *Dest = ::std::move(*I); return Dest; @@ -193,7 +198,7 @@ protected: /// std::move_backward, but not all stdlibs actually provide that. template static It2 move_backward(It1 I, It1 E, It2 Dest) { -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES while (I != E) *--Dest = ::std::move(*--E); return Dest; @@ -206,7 +211,7 @@ protected: /// memory starting with "Dest", constructing elements as needed. template static void uninitialized_move(It1 I, It1 E, It2 Dest) { -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES for (; I != E; ++I, ++Dest) ::new ((void*) &*Dest) T(::std::move(*I)); #else @@ -239,7 +244,7 @@ public: goto Retry; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES void push_back(T &&Elt) { if (this->EndX < this->CapacityX) { Retry: @@ -263,7 +268,8 @@ template void SmallVectorTemplateBase::grow(size_t MinSize) { size_t CurCapacity = this->capacity(); size_t CurSize = this->size(); - size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero. + // Always grow, even from zero. + size_t NewCapacity = size_t(NextPowerOf2(CurCapacity+2)); if (NewCapacity < MinSize) NewCapacity = MinSize; T *NewElts = static_cast(malloc(NewCapacity*sizeof(T))); @@ -365,7 +371,7 @@ template class SmallVectorImpl : public SmallVectorTemplateBase::value> { typedef SmallVectorTemplateBase::value > SuperClass; - SmallVectorImpl(const SmallVectorImpl&); // DISABLED. + SmallVectorImpl(const SmallVectorImpl&) LLVM_DELETED_FUNCTION; public: typedef typename SuperClass::iterator iterator; typedef typename SuperClass::size_type size_type; @@ -422,7 +428,7 @@ public: } T pop_back_val() { -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES T Result = ::std::move(this->back()); #else T Result = this->back(); @@ -495,7 +501,7 @@ public: return(N); } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES iterator insert(iterator I, T &&Elt) { if (I == this->end()) { // Important special case for empty vector. this->push_back(::std::move(Elt)); @@ -667,7 +673,7 @@ public: SmallVectorImpl &operator=(const SmallVectorImpl &RHS); -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES SmallVectorImpl &operator=(SmallVectorImpl &&RHS); #endif @@ -787,7 +793,7 @@ SmallVectorImpl &SmallVectorImpl:: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES template SmallVectorImpl &SmallVectorImpl::operator=(SmallVectorImpl &&RHS) { // Avoid self-assignment. @@ -898,7 +904,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES SmallVector(SmallVector &&RHS) : SmallVectorImpl(N) { if (!RHS.empty()) SmallVectorImpl::operator=(::std::move(RHS)); diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h new file mode 100644 index 000000000000..7f2a6f7d0bac --- /dev/null +++ b/include/llvm/ADT/SparseMultiSet.h @@ -0,0 +1,526 @@ +//===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SparseMultiSet class, which adds multiset behavior to +// the SparseSet. +// +// A sparse multiset holds a small number of objects identified by integer keys +// from a moderately sized universe. The sparse multiset uses more memory than +// other containers in order to provide faster operations. Any key can map to +// multiple values. A SparseMultiSetNode class is provided, which serves as a +// convenient base class for the contents of a SparseMultiSet. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SPARSEMULTISET_H +#define LLVM_ADT_SPARSEMULTISET_H + +#include "llvm/ADT/SparseSet.h" + +namespace llvm { + +/// Fast multiset implementation for objects that can be identified by small +/// unsigned keys. +/// +/// SparseMultiSet allocates memory proportional to the size of the key +/// universe, so it is not recommended for building composite data structures. +/// It is useful for algorithms that require a single set with fast operations. +/// +/// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time +/// fast clear() as fast as a vector. The find(), insert(), and erase() +/// operations are all constant time, and typically faster than a hash table. +/// The iteration order doesn't depend on numerical key values, it only depends +/// on the order of insert() and erase() operations. Iteration order is the +/// insertion order. Iteration is only provided over elements of equivalent +/// keys, but iterators are bidirectional. +/// +/// Compared to BitVector, SparseMultiSet uses 8x-40x more memory, but +/// offers constant-time clear() and size() operations as well as fast iteration +/// independent on the size of the universe. +/// +/// SparseMultiSet contains a dense vector holding all the objects and a sparse +/// array holding indexes into the dense vector. Most of the memory is used by +/// the sparse array which is the size of the key universe. The SparseT template +/// parameter provides a space/speed tradeoff for sets holding many elements. +/// +/// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the +/// sparse array uses 4 x Universe bytes. +/// +/// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache +/// lines, but the sparse array is 4x smaller. N is the number of elements in +/// the set. +/// +/// For sets that may grow to thousands of elements, SparseT should be set to +/// uint16_t or uint32_t. +/// +/// Multiset behavior is provided by providing doubly linked lists for values +/// that are inlined in the dense vector. SparseMultiSet is a good choice when +/// one desires a growable number of entries per key, as it will retain the +/// SparseSet algorithmic properties despite being growable. Thus, it is often a +/// better choice than a SparseSet of growable containers or a vector of +/// vectors. SparseMultiSet also keeps iterators valid after erasure (provided +/// the iterators don't point to the element erased), allowing for more +/// intuitive and fast removal. +/// +/// @tparam ValueT The type of objects in the set. +/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. +/// @tparam SparseT An unsigned integer type. See above. +/// +template, + typename SparseT = uint8_t> +class SparseMultiSet { + /// The actual data that's stored, as a doubly-linked list implemented via + /// indices into the DenseVector. The doubly linked list is implemented + /// circular in Prev indices, and INVALID-terminated in Next indices. This + /// provides efficient access to list tails. These nodes can also be + /// tombstones, in which case they are actually nodes in a single-linked + /// freelist of recyclable slots. + struct SMSNode { + static const unsigned INVALID = ~0U; + + ValueT Data; + unsigned Prev; + unsigned Next; + + SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { } + + /// List tails have invalid Nexts. + bool isTail() const { + return Next == INVALID; + } + + /// Whether this node is a tombstone node, and thus is in our freelist. + bool isTombstone() const { + return Prev == INVALID; + } + + /// Since the list is circular in Prev, all non-tombstone nodes have a valid + /// Prev. + bool isValid() const { return Prev != INVALID; } + }; + + typedef typename KeyFunctorT::argument_type KeyT; + typedef SmallVector DenseT; + DenseT Dense; + SparseT *Sparse; + unsigned Universe; + KeyFunctorT KeyIndexOf; + SparseSetValFunctor ValIndexOf; + + /// We have a built-in recycler for reusing tombstone slots. This recycler + /// puts a singly-linked free list into tombstone slots, allowing us quick + /// erasure, iterator preservation, and dense size. + unsigned FreelistIdx; + unsigned NumFree; + + unsigned sparseIndex(const ValueT &Val) const { + assert(ValIndexOf(Val) < Universe && + "Invalid key in set. Did object mutate?"); + return ValIndexOf(Val); + } + unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); } + + // Disable copy construction and assignment. + // This data structure is not meant to be used that way. + SparseMultiSet(const SparseMultiSet&) LLVM_DELETED_FUNCTION; + SparseMultiSet &operator=(const SparseMultiSet&) LLVM_DELETED_FUNCTION; + + /// Whether the given entry is the head of the list. List heads's previous + /// pointers are to the tail of the list, allowing for efficient access to the + /// list tail. D must be a valid entry node. + bool isHead(const SMSNode &D) const { + assert(D.isValid() && "Invalid node for head"); + return Dense[D.Prev].isTail(); + } + + /// Whether the given entry is a singleton entry, i.e. the only entry with + /// that key. + bool isSingleton(const SMSNode &N) const { + assert(N.isValid() && "Invalid node for singleton"); + // Is N its own predecessor? + return &Dense[N.Prev] == &N; + } + + /// Add in the given SMSNode. Uses a free entry in our freelist if + /// available. Returns the index of the added node. + unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) { + if (NumFree == 0) { + Dense.push_back(SMSNode(V, Prev, Next)); + return Dense.size() - 1; + } + + // Peel off a free slot + unsigned Idx = FreelistIdx; + unsigned NextFree = Dense[Idx].Next; + assert(Dense[Idx].isTombstone() && "Non-tombstone free?"); + + Dense[Idx] = SMSNode(V, Prev, Next); + FreelistIdx = NextFree; + --NumFree; + return Idx; + } + + /// Make the current index a new tombstone. Pushes it onto the freelist. + void makeTombstone(unsigned Idx) { + Dense[Idx].Prev = SMSNode::INVALID; + Dense[Idx].Next = FreelistIdx; + FreelistIdx = Idx; + ++NumFree; + } + +public: + typedef ValueT value_type; + typedef ValueT &reference; + typedef const ValueT &const_reference; + typedef ValueT *pointer; + typedef const ValueT *const_pointer; + + SparseMultiSet() + : Sparse(0), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) { } + + ~SparseMultiSet() { free(Sparse); } + + /// Set the universe size which determines the largest key the set can hold. + /// The universe must be sized before any elements can be added. + /// + /// @param U Universe size. All object keys must be less than U. + /// + void setUniverse(unsigned U) { + // It's not hard to resize the universe on a non-empty set, but it doesn't + // seem like a likely use case, so we can add that code when we need it. + assert(empty() && "Can only resize universe on an empty map"); + // Hysteresis prevents needless reallocations. + if (U >= Universe/4 && U <= Universe) + return; + free(Sparse); + // The Sparse array doesn't actually need to be initialized, so malloc + // would be enough here, but that will cause tools like valgrind to + // complain about branching on uninitialized data. + Sparse = reinterpret_cast(calloc(U, sizeof(SparseT))); + Universe = U; + } + + /// Our iterators are iterators over the collection of objects that share a + /// key. + template + class iterator_base : public std::iterator { + friend class SparseMultiSet; + SMSPtrTy SMS; + unsigned Idx; + unsigned SparseIdx; + + iterator_base(SMSPtrTy P, unsigned I, unsigned SI) + : SMS(P), Idx(I), SparseIdx(SI) { } + + /// Whether our iterator has fallen outside our dense vector. + bool isEnd() const { + if (Idx == SMSNode::INVALID) + return true; + + assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?"); + return false; + } + + /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid + bool isKeyed() const { return SparseIdx < SMS->Universe; } + + unsigned Prev() const { return SMS->Dense[Idx].Prev; } + unsigned Next() const { return SMS->Dense[Idx].Next; } + + void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; } + void setNext(unsigned N) { SMS->Dense[Idx].Next = N; } + + public: + typedef std::iterator super; + typedef typename super::value_type value_type; + typedef typename super::difference_type difference_type; + typedef typename super::pointer pointer; + typedef typename super::reference reference; + + iterator_base(const iterator_base &RHS) + : SMS(RHS.SMS), Idx(RHS.Idx), SparseIdx(RHS.SparseIdx) { } + + const iterator_base &operator=(const iterator_base &RHS) { + SMS = RHS.SMS; + Idx = RHS.Idx; + SparseIdx = RHS.SparseIdx; + return *this; + } + + reference operator*() const { + assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx && + "Dereferencing iterator of invalid key or index"); + + return SMS->Dense[Idx].Data; + } + pointer operator->() const { return &operator*(); } + + /// Comparison operators + bool operator==(const iterator_base &RHS) const { + // end compares equal + if (SMS == RHS.SMS && Idx == RHS.Idx) { + assert((isEnd() || SparseIdx == RHS.SparseIdx) && + "Same dense entry, but different keys?"); + return true; + } + + return false; + } + + bool operator!=(const iterator_base &RHS) const { + return !operator==(RHS); + } + + /// Increment and decrement operators + iterator_base &operator--() { // predecrement - Back up + assert(isKeyed() && "Decrementing an invalid iterator"); + assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) && + "Decrementing head of list"); + + // If we're at the end, then issue a new find() + if (isEnd()) + Idx = SMS->findIndex(SparseIdx).Prev(); + else + Idx = Prev(); + + return *this; + } + iterator_base &operator++() { // preincrement - Advance + assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator"); + Idx = Next(); + return *this; + } + iterator_base operator--(int) { // postdecrement + iterator_base I(*this); + --*this; + return I; + } + iterator_base operator++(int) { // postincrement + iterator_base I(*this); + ++*this; + return I; + } + }; + typedef iterator_base iterator; + typedef iterator_base const_iterator; + + // Convenience types + typedef std::pair RangePair; + + /// Returns an iterator past this container. Note that such an iterator cannot + /// be decremented, but will compare equal to other end iterators. + iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); } + const_iterator end() const { + return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID); + } + + /// Returns true if the set is empty. + /// + /// This is not the same as BitVector::empty(). + /// + bool empty() const { return size() == 0; } + + /// Returns the number of elements in the set. + /// + /// This is not the same as BitVector::size() which returns the size of the + /// universe. + /// + unsigned size() const { + assert(NumFree <= Dense.size() && "Out-of-bounds free entries"); + return Dense.size() - NumFree; + } + + /// Clears the set. This is a very fast constant time operation. + /// + void clear() { + // Sparse does not need to be cleared, see find(). + Dense.clear(); + NumFree = 0; + FreelistIdx = SMSNode::INVALID; + } + + /// Find an element by its index. + /// + /// @param Idx A valid index to find. + /// @returns An iterator to the element identified by key, or end(). + /// + iterator findIndex(unsigned Idx) { + assert(Idx < Universe && "Key out of range"); + assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + "SparseT must be an unsigned integer type"); + const unsigned Stride = std::numeric_limits::max() + 1u; + for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) { + const unsigned FoundIdx = sparseIndex(Dense[i]); + // Check that we're pointing at the correct entry and that it is the head + // of a valid list. + if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i])) + return iterator(this, i, Idx); + // Stride is 0 when SparseT >= unsigned. We don't need to loop. + if (!Stride) + break; + } + return end(); + } + + /// Find an element by its key. + /// + /// @param Key A valid key to find. + /// @returns An iterator to the element identified by key, or end(). + /// + iterator find(const KeyT &Key) { + return findIndex(KeyIndexOf(Key)); + } + + const_iterator find(const KeyT &Key) const { + iterator I = const_cast(this)->findIndex(KeyIndexOf(Key)); + return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key)); + } + + /// Returns the number of elements identified by Key. This will be linear in + /// the number of elements of that key. + unsigned count(const KeyT &Key) const { + unsigned Ret = 0; + for (const_iterator It = find(Key); It != end(); ++It) + ++Ret; + + return Ret; + } + + /// Returns true if this set contains an element identified by Key. + bool contains(const KeyT &Key) const { + return find(Key) != end(); + } + + /// Return the head and tail of the subset's list, otherwise returns end(). + iterator getHead(const KeyT &Key) { return find(Key); } + iterator getTail(const KeyT &Key) { + iterator I = find(Key); + if (I != end()) + I = iterator(this, I.Prev(), KeyIndexOf(Key)); + return I; + } + + /// The bounds of the range of items sharing Key K. First member is the head + /// of the list, and the second member is a decrementable end iterator for + /// that key. + RangePair equal_range(const KeyT &K) { + iterator B = find(K); + iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx); + return make_pair(B, E); + } + + /// Insert a new element at the tail of the subset list. Returns an iterator + /// to the newly added entry. + iterator insert(const ValueT &Val) { + unsigned Idx = sparseIndex(Val); + iterator I = findIndex(Idx); + + unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID); + + if (I == end()) { + // Make a singleton list + Sparse[Idx] = NodeIdx; + Dense[NodeIdx].Prev = NodeIdx; + return iterator(this, NodeIdx, Idx); + } + + // Stick it at the end. + unsigned HeadIdx = I.Idx; + unsigned TailIdx = I.Prev(); + Dense[TailIdx].Next = NodeIdx; + Dense[HeadIdx].Prev = NodeIdx; + Dense[NodeIdx].Prev = TailIdx; + + return iterator(this, NodeIdx, Idx); + } + + /// Erases an existing element identified by a valid iterator. + /// + /// This invalidates iterators pointing at the same entry, but erase() returns + /// an iterator pointing to the next element in the subset's list. This makes + /// it possible to erase selected elements while iterating over the subset: + /// + /// tie(I, E) = Set.equal_range(Key); + /// while (I != E) + /// if (test(*I)) + /// I = Set.erase(I); + /// else + /// ++I; + /// + /// Note that if the last element in the subset list is erased, this will + /// return an end iterator which can be decremented to get the new tail (if it + /// exists): + /// + /// tie(B, I) = Set.equal_range(Key); + /// for (bool isBegin = B == I; !isBegin; /* empty */) { + /// isBegin = (--I) == B; + /// if (test(I)) + /// break; + /// I = erase(I); + /// } + iterator erase(iterator I) { + assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() && + "erasing invalid/end/tombstone iterator"); + + // First, unlink the node from its list. Then swap the node out with the + // dense vector's last entry + iterator NextI = unlink(Dense[I.Idx]); + + // Put in a tombstone. + makeTombstone(I.Idx); + + return NextI; + } + + /// Erase all elements with the given key. This invalidates all + /// iterators of that key. + void eraseAll(const KeyT &K) { + for (iterator I = find(K); I != end(); /* empty */) + I = erase(I); + } + +private: + /// Unlink the node from its list. Returns the next node in the list. + iterator unlink(const SMSNode &N) { + if (isSingleton(N)) { + // Singleton is already unlinked + assert(N.Next == SMSNode::INVALID && "Singleton has next?"); + return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data)); + } + + if (isHead(N)) { + // If we're the head, then update the sparse array and our next. + Sparse[sparseIndex(N)] = N.Next; + Dense[N.Next].Prev = N.Prev; + return iterator(this, N.Next, ValIndexOf(N.Data)); + } + + if (N.isTail()) { + // If we're the tail, then update our head and our previous. + findIndex(sparseIndex(N)).setPrev(N.Prev); + Dense[N.Prev].Next = N.Next; + + // Give back an end iterator that can be decremented + iterator I(this, N.Prev, ValIndexOf(N.Data)); + return ++I; + } + + // Otherwise, just drop us + Dense[N.Next].Prev = N.Prev; + Dense[N.Prev].Next = N.Next; + return iterator(this, N.Next, ValIndexOf(N.Data)); + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index 063c6755c680..267a340a7581 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -20,8 +20,8 @@ #ifndef LLVM_ADT_SPARSESET_H #define LLVM_ADT_SPARSESET_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" #include diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index b54d10b9dd33..26aac7bea627 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -51,7 +51,9 @@ public: // Allow use of this class as the value itself. operator unsigned() const { return Value; } - const Statistic &operator=(unsigned Val) { + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + const Statistic &operator=(unsigned Val) { Value = Val; return init(); } @@ -106,6 +108,46 @@ public: return init(); } +#else // Statistics are disabled in release builds. + + const Statistic &operator=(unsigned Val) { + return *this; + } + + const Statistic &operator++() { + return *this; + } + + unsigned operator++(int) { + return 0; + } + + const Statistic &operator--() { + return *this; + } + + unsigned operator--(int) { + return 0; + } + + const Statistic &operator+=(const unsigned &V) { + return *this; + } + + const Statistic &operator-=(const unsigned &V) { + return *this; + } + + const Statistic &operator*=(const unsigned &V) { + return *this; + } + + const Statistic &operator/=(const unsigned &V) { + return *this; + } + +#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + protected: Statistic &init() { bool tmp = Initialized; diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index bf27c4313f82..d2887c5c2c56 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -14,8 +14,8 @@ #ifndef LLVM_ADT_STRINGEXTRAS_H #define LLVM_ADT_STRINGEXTRAS_H -#include "llvm/Support/DataTypes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { template class SmallVectorImpl; @@ -27,6 +27,17 @@ static inline char hexdigit(unsigned X, bool LowerCase = false) { return X < 10 ? '0' + X : HexChar + X - 10; } +/// Interpret the given character \p C as a hexadecimal digit and return its +/// value. +/// +/// If \p C is not a valid hex digit, -1U is returned. +static inline unsigned hexDigitValue(char C) { + if (C >= '0' && C <= '9') return C-'0'; + if (C >= 'a' && C <= 'f') return C-'a'+10U; + if (C >= 'A' && C <= 'F') return C-'A'+10U; + return -1U; +} + /// utohex_buffer - Emit the specified number into the buffer specified by /// BufferEnd, returning a pointer to the start of the string. This can be used /// like this: (note that the buffer must be large enough to handle any number): diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index b4497a276d0e..d01437b61c2b 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -53,7 +53,7 @@ public: class StringMapImpl { protected: // Array of NumBuckets pointers to entries, null pointers are holes. - // TheTable[NumBuckets] contains a sentinel value for easy iteration. Follwed + // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed // by an array of the actual hash values as unsigned integers. StringMapEntryBase **TheTable; unsigned NumBuckets; @@ -171,7 +171,6 @@ public: return Create(KeyStart, KeyEnd, Allocator, 0); } - /// Create - Create a StringMapEntry with normal malloc/free. template static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd, @@ -204,7 +203,6 @@ public: return *reinterpret_cast(Ptr); } - /// Destroy - Destroy this StringMapEntry, releasing memory back to the /// specified allocator. template @@ -239,6 +237,10 @@ public: explicit StringMap(AllocatorTy A) : StringMapImpl(static_cast(sizeof(MapEntryTy))), Allocator(A) {} + StringMap(unsigned InitialSize, AllocatorTy A) + : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))), + Allocator(A) {} + StringMap(const StringMap &RHS) : StringMapImpl(static_cast(sizeof(MapEntryTy))) { assert(RHS.empty() && @@ -290,7 +292,7 @@ public: return const_iterator(TheTable+Bucket, true); } - /// lookup - Return the entry for the specified key, or a default + /// lookup - Return the entry for the specified key, or a default /// constructed value if no such entry exists. ValueTy lookup(StringRef Key) const { const_iterator it = find(Key); @@ -336,8 +338,8 @@ public: StringMapEntryBase *&Bucket = TheTable[I]; if (Bucket && Bucket != getTombstoneVal()) { static_cast(Bucket)->Destroy(Allocator); - Bucket = 0; } + Bucket = 0; } NumItems = 0; @@ -427,7 +429,7 @@ public: return Ptr != RHS.Ptr; } - inline StringMapConstIterator& operator++() { // Preincrement + inline StringMapConstIterator& operator++() { // Preincrement ++Ptr; AdvancePastEmptyBuckets(); return *this; diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 292bde0cd900..224855e3f87c 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -11,7 +11,6 @@ #define LLVM_ADT_STRINGREF_H #include "llvm/Support/type_traits.h" - #include #include #include @@ -58,14 +57,14 @@ namespace llvm { // integer works around this bug. static size_t min(size_t a, size_t b) { return a < b ? a : b; } static size_t max(size_t a, size_t b) { return a > b ? a : b; } - + // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { if (Length == 0) { return 0; } return ::memcmp(Lhs,Rhs,Length); } - + public: /// @name Constructors /// @{ @@ -388,7 +387,7 @@ namespace llvm { Start = min(Start, Length); return StringRef(Data + Start, min(N, Length - Start)); } - + /// Return a StringRef equal to 'this' but with the first \p N elements /// dropped. StringRef drop_front(unsigned N = 1) const { @@ -536,7 +535,7 @@ namespace llvm { return LHS.compare(RHS) != -1; } - inline std::string &operator+=(std::string &buffer, llvm::StringRef string) { + inline std::string &operator+=(std::string &buffer, StringRef string) { return buffer.append(string.data(), string.size()); } diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index b69a964a23ba..7bea577f34d3 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -18,23 +18,25 @@ namespace llvm { - /// StringSet - A wrapper for StringMap that provides set-like - /// functionality. Only insert() and count() methods are used by my - /// code. + /// StringSet - A wrapper for StringMap that provides set-like functionality. template class StringSet : public llvm::StringMap { typedef llvm::StringMap base; public: - bool insert(StringRef InLang) { - assert(!InLang.empty()); - const char *KeyStart = InLang.data(); - const char *KeyEnd = KeyStart + InLang.size(); - llvm::StringMapEntry *Entry = llvm::StringMapEntry:: - Create(KeyStart, KeyEnd, base::getAllocator(), '+'); - if (!base::insert(Entry)) { - Entry->Destroy(base::getAllocator()); + + /// insert - Insert the specified key into the set. If the key already + /// exists in the set, return false and ignore the request, otherwise insert + /// it and return true. + bool insert(StringRef Key) { + // Get or create the map entry for the key; if it doesn't exist the value + // type will be default constructed which we use to detect insert. + // + // We use '+' as the sentinel value in the map. + assert(!Key.empty()); + StringMapEntry &Entry = this->GetOrCreateValue(Key); + if (Entry.getValue() == '+') return false; - } + Entry.setValue('+'); return true; } }; diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index d3d33b8adde1..cc0e7b63819c 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -70,7 +70,7 @@ public: return *this; } -#if LLVM_USE_RVALUE_REFERENCES +#if LLVM_HAS_RVALUE_REFERENCES TinyPtrVector(TinyPtrVector &&RHS) : Val(RHS.Val) { RHS.Val = (EltTy)0; } diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 408d70cf76f8..8fac222c13e3 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -44,7 +44,7 @@ public: UnknownArch, arm, // ARM; arm, armv.*, xscale - cellspu, // CellSPU: spu, cellspu + aarch64, // AArch64: aarch64 hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex mipsel, // MIPSEL: mipsel, mipsallegrexel @@ -101,8 +101,8 @@ public: Haiku, Minix, RTEMS, - NativeClient, - CNK, // BG/P Compute-Node Kernel + NaCl, // Native Client + CNK, // BG/P Compute-Node Kernel Bitrig, AIX }; @@ -112,6 +112,7 @@ public: GNU, GNUEABI, GNUEABIHF, + GNUX32, EABI, MachO, Android, @@ -296,9 +297,14 @@ public: return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; } + /// Is this an iOS triple. + bool isiOS() const { + return getOS() == Triple::IOS; + } + /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS). bool isOSDarwin() const { - return isMacOSX() || getOS() == Triple::IOS; + return isMacOSX() || isiOS(); } /// \brief Tests for either Cygwin or MinGW OS @@ -311,6 +317,11 @@ public: return getOS() == Triple::Win32 || isOSCygMing(); } + /// \brief Tests whether the OS is NaCl (Native Client) + bool isOSNaCl() const { + return getOS() == Triple::NaCl; + } + /// \brief Tests whether the OS uses the ELF binary format. bool isOSBinFormatELF() const { return !isOSDarwin() && !isOSWindows(); diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h index d23fccf3e8cc..b4fed7a0ebd2 100644 --- a/include/llvm/ADT/ValueMap.h +++ b/include/llvm/ADT/ValueMap.h @@ -27,10 +27,9 @@ #define LLVM_ADT_VALUEMAP_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Mutex.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/type_traits.h" -#include "llvm/Support/Mutex.h" - #include namespace llvm { diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h index a7f83a6bca9d..0497aa70887c 100644 --- a/include/llvm/ADT/VariadicFunction.h +++ b/include/llvm/ADT/VariadicFunction.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_VARIADIC_FUNCTION_H -#define LLVM_ADT_VARIADIC_FUNCTION_H +#ifndef LLVM_ADT_VARIADICFUNCTION_H +#define LLVM_ADT_VARIADICFUNCTION_H #include "llvm/ADT/ArrayRef.h" @@ -328,4 +328,4 @@ struct VariadicFunction3 { } // end namespace llvm -#endif // LLVM_ADT_VARIADIC_FUNCTION_H +#endif // LLVM_ADT_VARIADICFUNCTION_H diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index 7f5cd1718142..71dab2ef551c 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -234,17 +234,17 @@ public: pointer getNodePtrUnchecked() const { return NodePtr; } }; -// do not implement. this is to catch errors when people try to use -// them as random access iterators +// These are to catch errors when people try to use them as random access +// iterators. template -void operator-(int, ilist_iterator); +void operator-(int, ilist_iterator) LLVM_DELETED_FUNCTION; template -void operator-(ilist_iterator,int); +void operator-(ilist_iterator,int) LLVM_DELETED_FUNCTION; template -void operator+(int, ilist_iterator); +void operator+(int, ilist_iterator) LLVM_DELETED_FUNCTION; template -void operator+(ilist_iterator,int); +void operator+(ilist_iterator,int) LLVM_DELETED_FUNCTION; // operator!=/operator== - Allow mixed comparisons without dereferencing // the iterator, which could very likely be pointing to end(). @@ -274,12 +274,12 @@ template struct simplify_type; template struct simplify_type > { typedef NodeTy* SimpleType; - static SimpleType getSimplifiedValue(const ilist_iterator &Node) { + static SimpleType getSimplifiedValue(ilist_iterator &Node) { return &*Node; } }; template struct simplify_type > { - typedef NodeTy* SimpleType; + typedef /*const*/ NodeTy* SimpleType; static SimpleType getSimplifiedValue(const ilist_iterator &Node) { return &*Node; @@ -465,6 +465,17 @@ public: return where; } + /// Remove all nodes from the list like clear(), but do not call + /// removeNodeFromList() or deleteNode(). + /// + /// This should only be used immediately before freeing nodes in bulk to + /// avoid traversing the list and bringing all the nodes into cache. + void clearAndLeakNodesUnsafely() { + if (Head) { + Head = getTail(); + this->setPrev(Head, Head); + } + } private: // transfer - The heart of the splice function. Move linked list nodes from @@ -472,6 +483,10 @@ private: // void transfer(iterator position, iplist &L2, iterator first, iterator last) { assert(first != last && "Should be checked by callers"); + // Position cannot be contained in the range to be transferred. + // Check for the most common mistake. + assert(position != first && + "Insertion point can't be one of the transferred nodes"); if (position != last) { // Note: we have to be careful about the case when we move the first node diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h index f0080035cb88..03612440e7ac 100644 --- a/include/llvm/ADT/ilist_node.h +++ b/include/llvm/ADT/ilist_node.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_ILIST_NODE_H -#define LLVM_ADT_ILIST_NODE_H +#ifndef LLVM_ADT_ILISTNODE_H +#define LLVM_ADT_ILISTNODE_H namespace llvm { diff --git a/include/llvm/AddressingMode.h b/include/llvm/AddressingMode.h deleted file mode 100644 index 70b3c05238c5..000000000000 --- a/include/llvm/AddressingMode.h +++ /dev/null @@ -1,41 +0,0 @@ -//===--------- llvm/AddressingMode.h - Addressing Mode -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This file contains addressing mode data structures which are shared -// between LSR and a number of places in the codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADDRESSING_MODE_H -#define LLVM_ADDRESSING_MODE_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class GlobalValue; - -/// AddrMode - This represents an addressing mode of: -/// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg -/// If BaseGV is null, there is no BaseGV. -/// If BaseOffs is zero, there is no base offset. -/// If HasBaseReg is false, there is no base register. -/// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with -/// no scale. -/// -struct AddrMode { - GlobalValue *BaseGV; - int64_t BaseOffs; - bool HasBaseReg; - int64_t Scale; - AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {} -}; - -} // End llvm namespace - -#endif diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index be274afd1552..d703f21c021c 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -34,11 +34,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_ALIAS_ANALYSIS_H -#define LLVM_ANALYSIS_ALIAS_ANALYSIS_H +#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H +#define LLVM_ANALYSIS_ALIASANALYSIS_H -#include "llvm/Support/CallSite.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CallSite.h" namespace llvm { @@ -373,7 +373,7 @@ public: return getModRefInfo(I, Location(P, Size)); } - /// getModRefInfo (for call sites) - Return whether information about whether + /// getModRefInfo (for call sites) - Return information about whether /// a particular call site modifies or reads the specified memory location. virtual ModRefResult getModRefInfo(ImmutableCallSite CS, const Location &Loc); @@ -384,7 +384,7 @@ public: return getModRefInfo(CS, Location(P, Size)); } - /// getModRefInfo (for calls) - Return whether information about whether + /// getModRefInfo (for calls) - Return information about whether /// a particular call modifies or reads the specified memory location. ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) { return getModRefInfo(ImmutableCallSite(C), Loc); @@ -395,7 +395,7 @@ public: return getModRefInfo(C, Location(P, Size)); } - /// getModRefInfo (for invokes) - Return whether information about whether + /// getModRefInfo (for invokes) - Return information about whether /// a particular invoke modifies or reads the specified memory location. ModRefResult getModRefInfo(const InvokeInst *I, const Location &Loc) { @@ -408,7 +408,7 @@ public: return getModRefInfo(I, Location(P, Size)); } - /// getModRefInfo (for loads) - Return whether information about whether + /// getModRefInfo (for loads) - Return information about whether /// a particular load modifies or reads the specified memory location. ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc); @@ -417,7 +417,7 @@ public: return getModRefInfo(L, Location(P, Size)); } - /// getModRefInfo (for stores) - Return whether information about whether + /// getModRefInfo (for stores) - Return information about whether /// a particular store modifies or reads the specified memory location. ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc); @@ -426,7 +426,7 @@ public: return getModRefInfo(S, Location(P, Size)); } - /// getModRefInfo (for fences) - Return whether information about whether + /// getModRefInfo (for fences) - Return information about whether /// a particular store modifies or reads the specified memory location. ModRefResult getModRefInfo(const FenceInst *S, const Location &Loc) { // Conservatively correct. (We could possibly be a bit smarter if @@ -439,7 +439,7 @@ public: return getModRefInfo(S, Location(P, Size)); } - /// getModRefInfo (for cmpxchges) - Return whether information about whether + /// getModRefInfo (for cmpxchges) - Return information about whether /// a particular cmpxchg modifies or reads the specified memory location. ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc); @@ -449,7 +449,7 @@ public: return getModRefInfo(CX, Location(P, Size)); } - /// getModRefInfo (for atomicrmws) - Return whether information about whether + /// getModRefInfo (for atomicrmws) - Return information about whether /// a particular atomicrmw modifies or reads the specified memory location. ModRefResult getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc); @@ -459,7 +459,7 @@ public: return getModRefInfo(RMW, Location(P, Size)); } - /// getModRefInfo (for va_args) - Return whether information about whether + /// getModRefInfo (for va_args) - Return information about whether /// a particular va_arg modifies or reads the specified memory location. ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc); @@ -587,17 +587,12 @@ bool isNoAliasCall(const Value *V); /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: /// Global Variables and Functions (but not Global Aliases) -/// Allocas and Mallocs +/// Allocas /// ByVal and NoAlias Arguments -/// NoAlias returns +/// NoAlias returns (e.g. calls to malloc) /// bool isIdentifiedObject(const Value *V); -/// isKnownNonNull - Return true if this pointer couldn't possibly be null by -/// its definition. This returns true for allocas, non-extern-weak globals and -/// byval arguments. -bool isKnownNonNull(const Value *V); - } // End llvm namespace #endif diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 1e606c81d9c7..da007072e559 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -17,11 +17,10 @@ #ifndef LLVM_ANALYSIS_ALIASSETTRACKER_H #define LLVM_ANALYSIS_ALIASSETTRACKER_H -#include "llvm/Support/CallSite.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Support/ValueHandle.h" #include namespace llvm { diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h index 5168ab78729b..b3e2d18eb2c6 100644 --- a/include/llvm/Analysis/BlockFrequencyImpl.h +++ b/include/llvm/Analysis/BlockFrequencyImpl.h @@ -14,17 +14,17 @@ #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H #define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H -#include "llvm/BasicBlock.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #include +#include namespace llvm { @@ -271,7 +271,7 @@ class BlockFrequencyImpl { BlockT *EntryBlock = fn->begin(); - copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT)); + std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT)); unsigned RPOidx = 0; for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index c0567daa3a5e..6c23f7c3aeb3 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -14,10 +14,10 @@ #ifndef LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" namespace llvm { diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index 4704a929acf6..fa596c3a3c99 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -15,10 +15,10 @@ #ifndef LLVM_ANALYSIS_CFGPRINTER_H #define LLVM_ANALYSIS_CFGPRINTER_H -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CFG.h" #include "llvm/Support/GraphWriter.h" diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index 6a9ed310375a..591484dd2782 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -51,13 +51,13 @@ #ifndef LLVM_ANALYSIS_CALLGRAPH_H #define LLVM_ANALYSIS_CALLGRAPH_H -#include "llvm/Function.h" -#include "llvm/Pass.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/IncludeFile.h" +#include "llvm/Support/ValueHandle.h" #include namespace llvm { diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h similarity index 96% rename from include/llvm/CallGraphSCCPass.h rename to include/llvm/Analysis/CallGraphSCCPass.h index 7154aa3259d2..e609dac11891 100644 --- a/include/llvm/CallGraphSCCPass.h +++ b/include/llvm/Analysis/CallGraphSCCPass.h @@ -18,11 +18,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CALL_GRAPH_SCC_PASS_H -#define LLVM_CALL_GRAPH_SCC_PASS_H +#ifndef LLVM_ANALYSIS_CALLGRAPHSCCPASS_H +#define LLVM_ANALYSIS_CALLGRAPHSCCPASS_H -#include "llvm/Pass.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Pass.h" namespace llvm { @@ -39,6 +39,9 @@ public: /// corresponding to a CallGraph. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const; + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + /// doInitialization - This method is called before the SCC's of the program /// has been processed, allowing the pass to do initialization as necessary. virtual bool doInitialization(CallGraph &CG) { diff --git a/include/llvm/Analysis/CallPrinter.h b/include/llvm/Analysis/CallPrinter.h new file mode 100644 index 000000000000..5f5d160c3ca0 --- /dev/null +++ b/include/llvm/Analysis/CallPrinter.h @@ -0,0 +1,27 @@ +//===-- CallPrinter.h - Call graph printer external interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines external functions that can be called to explicitly +// instantiate the call graph printer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CALLPRINTER_H +#define LLVM_ANALYSIS_CALLPRINTER_H + +namespace llvm { + + class ModulePass; + + ModulePass *createCallGraphViewerPass(); + ModulePass *createCallGraphPrinterPass(); + +} // end namespace llvm + +#endif diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index 2889269b957a..8edabfe860a1 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -14,12 +14,11 @@ #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H #define LLVM_ANALYSIS_CAPTURETRACKING_H -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Support/CallSite.h" - namespace llvm { + + class Value; + class Use; + /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h index 4398faa20a7b..086934d0e69b 100644 --- a/include/llvm/Analysis/CodeMetrics.h +++ b/include/llvm/Analysis/CodeMetrics.h @@ -19,77 +19,75 @@ #include "llvm/Support/CallSite.h" namespace llvm { - class BasicBlock; - class Function; - class Instruction; - class DataLayout; - class Value; +class BasicBlock; +class Function; +class Instruction; +class DataLayout; +class TargetTransformInfo; +class Value; - /// \brief Check whether an instruction is likely to be "free" when lowered. - bool isInstructionFree(const Instruction *I, const DataLayout *TD = 0); +/// \brief Check whether a call will lower to something small. +/// +/// This tests checks whether this callsite will lower to something +/// significantly cheaper than a traditional call, often a single +/// instruction. Note that if isInstructionFree(CS.getInstruction()) would +/// return true, so will this function. +bool callIsSmall(ImmutableCallSite CS); - /// \brief Check whether a call will lower to something small. +/// \brief Utility to calculate the size and a few similar metrics for a set +/// of basic blocks. +struct CodeMetrics { + /// \brief True if this function contains a call to setjmp or other functions + /// with attribute "returns twice" without having the attribute itself. + bool exposesReturnsTwice; + + /// \brief True if this function calls itself. + bool isRecursive; + + /// \brief True if this function cannot be duplicated. /// - /// This tests checks whether this callsite will lower to something - /// significantly cheaper than a traditional call, often a single - /// instruction. Note that if isInstructionFree(CS.getInstruction()) would - /// return true, so will this function. - bool callIsSmall(ImmutableCallSite CS); + /// True if this function contains one or more indirect branches, or it contains + /// one or more 'noduplicate' instructions. + bool notDuplicatable; - /// \brief Utility to calculate the size and a few similar metrics for a set - /// of basic blocks. - struct CodeMetrics { - /// \brief True if this function contains a call to setjmp or other functions - /// with attribute "returns twice" without having the attribute itself. - bool exposesReturnsTwice; + /// \brief True if this function calls alloca (in the C sense). + bool usesDynamicAlloca; - /// \brief True if this function calls itself. - bool isRecursive; + /// \brief Number of instructions in the analyzed blocks. + unsigned NumInsts; - /// \brief True if this function contains one or more indirect branches. - bool containsIndirectBr; + /// \brief Number of analyzed blocks. + unsigned NumBlocks; - /// \brief True if this function calls alloca (in the C sense). - bool usesDynamicAlloca; + /// \brief Keeps track of basic block code size estimates. + DenseMap NumBBInsts; - /// \brief Number of instructions in the analyzed blocks. - unsigned NumInsts; + /// \brief Keep track of the number of calls to 'big' functions. + unsigned NumCalls; - /// \brief Number of analyzed blocks. - unsigned NumBlocks; + /// \brief The number of calls to internal functions with a single caller. + /// + /// These are likely targets for future inlining, likely exposed by + /// interleaved devirtualization. + unsigned NumInlineCandidates; - /// \brief Keeps track of basic block code size estimates. - DenseMap NumBBInsts; + /// \brief How many instructions produce vector values. + /// + /// The inliner is more aggressive with inlining vector kernels. + unsigned NumVectorInsts; - /// \brief Keep track of the number of calls to 'big' functions. - unsigned NumCalls; + /// \brief How many 'ret' instructions the blocks contain. + unsigned NumRets; - /// \brief The number of calls to internal functions with a single caller. - /// - /// These are likely targets for future inlining, likely exposed by - /// interleaved devirtualization. - unsigned NumInlineCandidates; + CodeMetrics() + : exposesReturnsTwice(false), isRecursive(false), notDuplicatable(false), + usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0), + NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {} - /// \brief How many instructions produce vector values. - /// - /// The inliner is more aggressive with inlining vector kernels. - unsigned NumVectorInsts; + /// \brief Add information about a block to the current state. + void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI); +}; - /// \brief How many 'ret' instructions the blocks contain. - unsigned NumRets; - - CodeMetrics() : exposesReturnsTwice(false), isRecursive(false), - containsIndirectBr(false), usesDynamicAlloca(false), - NumInsts(0), NumBlocks(0), NumCalls(0), - NumInlineCandidates(0), NumVectorInsts(0), - NumRets(0) {} - - /// \brief Add information about a block to the current state. - void analyzeBasicBlock(const BasicBlock *BB, const DataLayout *TD = 0); - - /// \brief Add information about a function to the current state. - void analyzeFunction(Function *F, const DataLayout *TD = 0); - }; } #endif diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index b701b8fca5d4..0fc1c2dc360d 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -11,27 +11,25 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H -#define LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H +#ifndef LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H +#define LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H -#include "llvm/Pass.h" #include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Pass.h" namespace llvm { -template -struct DOTGraphTraitsViewer : public FunctionPass { - std::string Name; - DOTGraphTraitsViewer(std::string GraphName, char &ID) : FunctionPass(ID) { - Name = GraphName; - } +template +class DOTGraphTraitsViewer : public FunctionPass { +public: + DOTGraphTraitsViewer(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} virtual bool runOnFunction(Function &F) { - Analysis *Graph; - std::string Title, GraphName; - Graph = &getAnalysis(); - GraphName = DOTGraphTraits::getGraphName(Graph); - Title = GraphName + " for '" + F.getName().str() + "' function"; + Analysis *Graph = &getAnalysis(); + std::string GraphName = DOTGraphTraits::getGraphName(Graph); + std::string Title = GraphName + " for '" + F.getName().str() + "' function"; + ViewGraph(Graph, Name, Simple, Title); return false; @@ -41,36 +39,34 @@ struct DOTGraphTraitsViewer : public FunctionPass { AU.setPreservesAll(); AU.addRequired(); } + +private: + std::string Name; }; template -struct DOTGraphTraitsPrinter : public FunctionPass { - - std::string Name; - - DOTGraphTraitsPrinter(std::string GraphName, char &ID) - : FunctionPass(ID) { - Name = GraphName; - } +class DOTGraphTraitsPrinter : public FunctionPass { +public: + DOTGraphTraitsPrinter(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} virtual bool runOnFunction(Function &F) { - Analysis *Graph; + Analysis *Graph = &getAnalysis(); std::string Filename = Name + "." + F.getName().str() + ".dot"; + std::string ErrorInfo; + errs() << "Writing '" << Filename << "'..."; - std::string ErrorInfo; raw_fd_ostream File(Filename.c_str(), ErrorInfo); - Graph = &getAnalysis(); - - std::string Title, GraphName; - GraphName = DOTGraphTraits::getGraphName(Graph); - Title = GraphName + " for '" + F.getName().str() + "' function"; + std::string GraphName = DOTGraphTraits::getGraphName(Graph); + std::string Title = GraphName + " for '" + F.getName().str() + "' function"; if (ErrorInfo.empty()) WriteGraph(File, Graph, Simple, Title); else errs() << " error opening file for writing!"; errs() << "\n"; + return false; } @@ -78,6 +74,69 @@ struct DOTGraphTraitsPrinter : public FunctionPass { AU.setPreservesAll(); AU.addRequired(); } + +private: + std::string Name; }; -} + +template +class DOTGraphTraitsModuleViewer : public ModulePass { +public: + DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID) + : ModulePass(ID), Name(GraphName) {} + + virtual bool runOnModule(Module &M) { + Analysis *Graph = &getAnalysis(); + std::string Title = DOTGraphTraits::getGraphName(Graph); + + ViewGraph(Graph, Name, Simple, Title); + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + +private: + std::string Name; +}; + +template +class DOTGraphTraitsModulePrinter : public ModulePass { +public: + DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID) + : ModulePass(ID), Name(GraphName) {} + + virtual bool runOnModule(Module &M) { + Analysis *Graph = &getAnalysis(); + std::string Filename = Name + ".dot"; + std::string ErrorInfo; + + errs() << "Writing '" << Filename << "'..."; + + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + std::string Title = DOTGraphTraits::getGraphName(Graph); + + if (ErrorInfo.empty()) + WriteGraph(File, Graph, Simple, Title); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + +private: + std::string Name; +}; + +} // end namespace llvm + #endif diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index b4327eeb0b1e..a78ac5919acb 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -18,6 +18,16 @@ // of memory references in a function, returning either NULL, for no dependence, // or a more-or-less detailed description of the dependence between them. // +// This pass exists to support the DependenceGraph pass. There are two separate +// passes because there's a useful separation of concerns. A dependence exists +// if two conditions are met: +// +// 1) Two instructions reference the same memory location, and +// 2) There is a flow of control leading from one instruction to the other. +// +// DependenceAnalysis attacks the first condition; DependenceGraph will attack +// the second (it's not yet ready). +// // Please note that this is work in progress and the interface is subject to // change. // @@ -30,9 +40,9 @@ #ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H -#include "llvm/Instructions.h" -#include "llvm/Pass.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" namespace llvm { class AliasAnalysis; @@ -53,8 +63,8 @@ namespace llvm { /// input dependences are unordered. class Dependence { public: - Dependence(const Instruction *Source, - const Instruction *Destination) : + Dependence(Instruction *Source, + Instruction *Destination) : Src(Source), Dst(Destination) {} virtual ~Dependence() {} @@ -82,11 +92,11 @@ namespace llvm { /// getSrc - Returns the source instruction for this dependence. /// - const Instruction *getSrc() const { return Src; } + Instruction *getSrc() const { return Src; } /// getDst - Returns the destination instruction for this dependence. /// - const Instruction *getDst() const { return Dst; } + Instruction *getDst() const { return Dst; } /// isInput - Returns true if this is an input dependence. /// @@ -158,14 +168,14 @@ namespace llvm { /// void dump(raw_ostream &OS) const; private: - const Instruction *Src, *Dst; + Instruction *Src, *Dst; friend class DependenceAnalysis; }; /// FullDependence - This class represents a dependence between two memory /// references in a function. It contains detailed information about the - /// dependence (direction vectors, etc) and is used when the compiler is + /// dependence (direction vectors, etc.) and is used when the compiler is /// able to accurately analyze the interaction of the references; that is, /// it is not a confused dependence (see Dependence). In most cases /// (for output, flow, and anti dependences), the dependence implies an @@ -173,12 +183,12 @@ namespace llvm { /// input dependences are unordered. class FullDependence : public Dependence { public: - FullDependence(const Instruction *Src, - const Instruction *Dst, + FullDependence(Instruction *Src, + Instruction *Dst, bool LoopIndependent, unsigned Levels); ~FullDependence() { - delete DV; + delete[] DV; } /// isLoopIndependent - Returns true if this is a loop-independent @@ -234,8 +244,8 @@ namespace llvm { /// DependenceAnalysis - This class is the main dependence-analysis driver. /// class DependenceAnalysis : public FunctionPass { - void operator=(const DependenceAnalysis &); // do not implement - DependenceAnalysis(const DependenceAnalysis &); // do not implement + void operator=(const DependenceAnalysis &) LLVM_DELETED_FUNCTION; + DependenceAnalysis(const DependenceAnalysis &) LLVM_DELETED_FUNCTION; public: /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a @@ -243,11 +253,11 @@ namespace llvm { /// The flag PossiblyLoopIndependent should be set by the caller /// if it appears that control flow can reach from Src to Dst /// without traversing a loop back edge. - Dependence *depends(const Instruction *Src, - const Instruction *Dst, + Dependence *depends(Instruction *Src, + Instruction *Dst, bool PossiblyLoopIndependent); - /// getSplitIteration - Give a dependence that's splitable at some + /// getSplitIteration - Give a dependence that's splittable at some /// particular level, return the iteration that should be used to split /// the loop. /// diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h index 0c29236dde96..c0f95cbd9b9b 100644 --- a/include/llvm/Analysis/DominatorInternals.h +++ b/include/llvm/Analysis/DominatorInternals.h @@ -10,8 +10,8 @@ #ifndef LLVM_ANALYSIS_DOMINATOR_INTERNALS_H #define LLVM_ANALYSIS_DOMINATOR_INTERNALS_H -#include "llvm/Analysis/Dominators.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" //===----------------------------------------------------------------------===// // diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index 8940971558a3..81c04bb6b0fa 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -15,13 +15,13 @@ #ifndef LLVM_ANALYSIS_DOMINATORS_H #define LLVM_ANALYSIS_DOMINATORS_H -#include "llvm/Pass.h" -#include "llvm/Function.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" @@ -101,18 +101,18 @@ public: Children.clear(); } - bool compare(DomTreeNodeBase *Other) { + bool compare(const DomTreeNodeBase *Other) const { if (getNumChildren() != Other->getNumChildren()) return true; - SmallPtrSet OtherChildren; - for (iterator I = Other->begin(), E = Other->end(); I != E; ++I) { - NodeT *Nd = (*I)->getBlock(); + SmallPtrSet OtherChildren; + for (const_iterator I = Other->begin(), E = Other->end(); I != E; ++I) { + const NodeT *Nd = (*I)->getBlock(); OtherChildren.insert(Nd); } - for (iterator I = begin(), E = end(); I != E; ++I) { - NodeT *N = (*I)->getBlock(); + for (const_iterator I = begin(), E = end(); I != E; ++I) { + const NodeT *N = (*I)->getBlock(); if (OtherChildren.count(N) == 0) return true; } @@ -663,8 +663,7 @@ public: // Initialize the roots list for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F), E = TraitsTy::nodes_end(&F); I != E; ++I) { - if (std::distance(TraitsTy::child_begin(I), - TraitsTy::child_end(I)) == 0) + if (TraitsTy::child_begin(I) == TraitsTy::child_end(I)) addRoot(I); // Prepopulate maps so that we don't get iterator invalidation issues later. diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 9b98013a1683..c9828015be29 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -24,7 +24,6 @@ namespace llvm { class DominatorTree; class Instruction; class Value; -class IVUsers; class ScalarEvolution; class SCEV; class IVUsers; diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index a075db33427d..bc7924e10fdc 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -14,122 +14,130 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H -#include "llvm/Function.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include #include -#include namespace llvm { +class CallSite; +class DataLayout; +class Function; +class TargetTransformInfo; - class CallSite; - class DataLayout; +namespace InlineConstants { + // Various magic constants used to adjust heuristics. + const int InstrCost = 5; + const int IndirectCallThreshold = 100; + const int CallPenalty = 25; + const int LastCallToStaticBonus = -15000; + const int ColdccPenalty = 2000; + const int NoreturnPenalty = 10000; + /// Do not inline functions which allocate this many bytes on the stack + /// when the caller is recursive. + const unsigned TotalAllocaSizeRecursiveCaller = 1024; +} - namespace InlineConstants { - // Various magic constants used to adjust heuristics. - const int InstrCost = 5; - const int IndirectCallThreshold = 100; - const int CallPenalty = 25; - const int LastCallToStaticBonus = -15000; - const int ColdccPenalty = 2000; - const int NoreturnPenalty = 10000; - /// Do not inline functions which allocate this many bytes on the stack - /// when the caller is recursive. - const unsigned TotalAllocaSizeRecursiveCaller = 1024; +/// \brief Represents the cost of inlining a function. +/// +/// This supports special values for functions which should "always" or +/// "never" be inlined. Otherwise, the cost represents a unitless amount; +/// smaller values increase the likelihood of the function being inlined. +/// +/// Objects of this type also provide the adjusted threshold for inlining +/// based on the information available for a particular callsite. They can be +/// directly tested to determine if inlining should occur given the cost and +/// threshold for this cost metric. +class InlineCost { + enum SentinelValues { + AlwaysInlineCost = INT_MIN, + NeverInlineCost = INT_MAX + }; + + /// \brief The estimated cost of inlining this callsite. + const int Cost; + + /// \brief The adjusted threshold against which this cost was computed. + const int Threshold; + + // Trivial constructor, interesting logic in the factory functions below. + InlineCost(int Cost, int Threshold) : Cost(Cost), Threshold(Threshold) {} + +public: + static InlineCost get(int Cost, int Threshold) { + assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); + assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); + return InlineCost(Cost, Threshold); + } + static InlineCost getAlways() { + return InlineCost(AlwaysInlineCost, 0); + } + static InlineCost getNever() { + return InlineCost(NeverInlineCost, 0); } - /// \brief Represents the cost of inlining a function. + /// \brief Test whether the inline cost is low enough for inlining. + operator bool() const { + return Cost < Threshold; + } + + bool isAlways() const { return Cost == AlwaysInlineCost; } + bool isNever() const { return Cost == NeverInlineCost; } + bool isVariable() const { return !isAlways() && !isNever(); } + + /// \brief Get the inline cost estimate. + /// It is an error to call this on an "always" or "never" InlineCost. + int getCost() const { + assert(isVariable() && "Invalid access of InlineCost"); + return Cost; + } + + /// \brief Get the cost delta from the threshold for inlining. + /// Only valid if the cost is of the variable kind. Returns a negative + /// value if the cost is too high to inline. + int getCostDelta() const { return Threshold - getCost(); } +}; + +/// \brief Cost analyzer used by inliner. +class InlineCostAnalysis : public CallGraphSCCPass { + const DataLayout *TD; + const TargetTransformInfo *TTI; + +public: + static char ID; + + InlineCostAnalysis(); + ~InlineCostAnalysis(); + + // Pass interface implementation. + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnSCC(CallGraphSCC &SCC); + + /// \brief Get an InlineCost object representing the cost of inlining this + /// callsite. /// - /// This supports special values for functions which should "always" or - /// "never" be inlined. Otherwise, the cost represents a unitless amount; - /// smaller values increase the likelihood of the function being inlined. + /// Note that threshold is passed into this function. Only costs below the + /// threshold are computed with any accuracy. The threshold can be used to + /// bound the computation necessary to determine whether the cost is + /// sufficiently low to warrant inlining. /// - /// Objects of this type also provide the adjusted threshold for inlining - /// based on the information available for a particular callsite. They can be - /// directly tested to determine if inlining should occur given the cost and - /// threshold for this cost metric. - class InlineCost { - enum SentinelValues { - AlwaysInlineCost = INT_MIN, - NeverInlineCost = INT_MAX - }; + /// Also note that calling this function *dynamically* computes the cost of + /// inlining the callsite. It is an expensive, heavyweight call. + InlineCost getInlineCost(CallSite CS, int Threshold); - /// \brief The estimated cost of inlining this callsite. - const int Cost; + /// \brief Get an InlineCost with the callee explicitly specified. + /// This allows you to calculate the cost of inlining a function via a + /// pointer. This behaves exactly as the version with no explicit callee + /// parameter in all other respects. + // + // Note: This is used by out-of-tree passes, please do not remove without + // adding a replacement API. + InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); - /// \brief The adjusted threshold against which this cost was computed. - const int Threshold; + /// \brief Minimal filter to detect invalid constructs for inlining. + bool isInlineViable(Function &Callee); +}; - // Trivial constructor, interesting logic in the factory functions below. - InlineCost(int Cost, int Threshold) - : Cost(Cost), Threshold(Threshold) {} - - public: - static InlineCost get(int Cost, int Threshold) { - assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); - assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); - return InlineCost(Cost, Threshold); - } - static InlineCost getAlways() { - return InlineCost(AlwaysInlineCost, 0); - } - static InlineCost getNever() { - return InlineCost(NeverInlineCost, 0); - } - - /// \brief Test whether the inline cost is low enough for inlining. - operator bool() const { - return Cost < Threshold; - } - - bool isAlways() const { return Cost == AlwaysInlineCost; } - bool isNever() const { return Cost == NeverInlineCost; } - bool isVariable() const { return !isAlways() && !isNever(); } - - /// \brief Get the inline cost estimate. - /// It is an error to call this on an "always" or "never" InlineCost. - int getCost() const { - assert(isVariable() && "Invalid access of InlineCost"); - return Cost; - } - - /// \brief Get the cost delta from the threshold for inlining. - /// Only valid if the cost is of the variable kind. Returns a negative - /// value if the cost is too high to inline. - int getCostDelta() const { return Threshold - getCost(); } - }; - - /// InlineCostAnalyzer - Cost analyzer used by inliner. - class InlineCostAnalyzer { - // DataLayout if available, or null. - const DataLayout *TD; - - public: - InlineCostAnalyzer(): TD(0) {} - - void setDataLayout(const DataLayout *TData) { TD = TData; } - - /// \brief Get an InlineCost object representing the cost of inlining this - /// callsite. - /// - /// Note that threshold is passed into this function. Only costs below the - /// threshold are computed with any accuracy. The threshold can be used to - /// bound the computation necessary to determine whether the cost is - /// sufficiently low to warrant inlining. - InlineCost getInlineCost(CallSite CS, int Threshold); - /// getCalledFunction - The heuristic used to determine if we should inline - /// the function call or not. The callee is explicitly specified, to allow - /// you to calculate the cost of inlining a function via a pointer. This - /// behaves exactly as the version with no explicit callee parameter in all - /// other respects. - // - // Note: This is used by out-of-tree passes, please do not remove without - // adding a replacement API. - InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); - }; } #endif diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index e561e3742b64..d760a4cba1cf 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -14,17 +14,33 @@ // ("and i32 %x, %x" -> "%x"). If the simplification is also an instruction // then it dominates the original instruction. // +// These routines implicitly resolve undef uses. The easiest way to be safe when +// using these routines to obtain simplified values for existing instructions is +// to always replace all uses of the instructions with the resulting simplified +// values. This will prevent other code from seeing the same undef uses and +// resolving them to different values. +// +// These routines are designed to tolerate moderately incomplete IR, such as +// instructions that are not connected to basic blocks yet. However, they do +// require that all the IR that they encounter be valid. In particular, they +// require that all non-constant values be defined in the same function, and the +// same call context of that function (and not split between caller and callee +// contexts of a directly recursive call, for example). +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H +#include "llvm/IR/User.h" + namespace llvm { template class ArrayRef; class DominatorTree; class Instruction; class DataLayout; + class FastMathFlags; class TargetLibraryInfo; class Type; class Value; @@ -43,6 +59,28 @@ namespace llvm { const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); + /// Given operands for an FAdd, see if we can fold the result. If not, this + /// returns null. + Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const DataLayout *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + + /// Given operands for an FSub, see if we can fold the result. If not, this + /// returns null. + Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const DataLayout *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + + /// Given operands for an FMul, see if we can fold the result. If not, this + /// returns null. + Value *SimplifyFMulInst(Value *LHS, Value *RHS, + FastMathFlags FMF, + const DataLayout *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, @@ -57,7 +95,7 @@ namespace llvm { /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, + Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); @@ -69,7 +107,7 @@ namespace llvm { /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, + Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); @@ -88,7 +126,7 @@ namespace llvm { /// SimplifyShlInst - Given operands for a Shl, see if we can /// fold the result. If not, this returns null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *TD = 0, + const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); @@ -127,14 +165,14 @@ namespace llvm { /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD = 0, + const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD = 0, + const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); @@ -178,10 +216,28 @@ namespace llvm { /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout *TD = 0, + const DataLayout *TD = 0, const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); + /// \brief Given a function and iterators over arguments, see if we can fold + /// the result. + /// + /// If this call could not be simplified returns null. + Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const DataLayout *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + + /// \brief Given a function and set of arguments, see if we can fold the + /// result. + /// + /// If this call could not be simplified returns null. + Value *SimplifyCall(Value *V, ArrayRef Args, + const DataLayout *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *SimplifyInstruction(Instruction *I, const DataLayout *TD = 0, diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h index ca8ad73131a9..5ce1260eca1f 100644 --- a/include/llvm/Analysis/Interval.h +++ b/include/llvm/Analysis/Interval.h @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INTERVAL_H -#define LLVM_INTERVAL_H +#ifndef LLVM_ANALYSIS_INTERVAL_H +#define LLVM_ANALYSIS_INTERVAL_H #include "llvm/ADT/GraphTraits.h" #include diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h index 0968c7468e68..22067c4f3c82 100644 --- a/include/llvm/Analysis/IntervalIterator.h +++ b/include/llvm/Analysis/IntervalIterator.h @@ -30,11 +30,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INTERVAL_ITERATOR_H -#define LLVM_INTERVAL_ITERATOR_H +#ifndef LLVM_ANALYSIS_INTERVALITERATOR_H +#define LLVM_ANALYSIS_INTERVALITERATOR_H #include "llvm/Analysis/IntervalPartition.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CFG.h" #include #include @@ -157,7 +157,7 @@ public: private: // ProcessInterval - This method is used during the construction of the // interval graph. It walks through the source graph, recursively creating - // an interval per invokation until the entire graph is covered. This uses + // an interval per invocation until the entire graph is covered. This uses // the ProcessNode method to add all of the nodes to the interval. // // This method is templated because it may operate on two different source diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h index bce84be2f4fd..8cade58cd324 100644 --- a/include/llvm/Analysis/IntervalPartition.h +++ b/include/llvm/Analysis/IntervalPartition.h @@ -20,8 +20,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INTERVAL_PARTITION_H -#define LLVM_INTERVAL_PARTITION_H +#ifndef LLVM_ANALYSIS_INTERVALPARTITION_H +#define LLVM_ANALYSIS_INTERVALPARTITION_H #include "llvm/Analysis/Interval.h" #include "llvm/Pass.h" diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h index 243234b75635..c01b210acf4b 100644 --- a/include/llvm/Analysis/LibCallAliasAnalysis.h +++ b/include/llvm/Analysis/LibCallAliasAnalysis.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LIBCALL_AA_H -#define LLVM_ANALYSIS_LIBCALL_AA_H +#ifndef LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H +#define LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Pass.h" diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index afc90c2f7441..ebcb76254111 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -14,7 +14,7 @@ #ifndef LLVM_ANALYSIS_LOADS_H #define LLVM_ANALYSIS_LOADS_H -#include "llvm/BasicBlock.h" +#include "llvm/IR/BasicBlock.h" namespace llvm { diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index c5d7b0128e74..783e347522d4 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -27,21 +27,16 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LOOP_INFO_H -#define LLVM_ANALYSIS_LOOP_INFO_H +#ifndef LLVM_ANALYSIS_LOOPINFO_H +#define LLVM_ANALYSIS_LOOPINFO_H -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" #include -#include namespace llvm { @@ -56,6 +51,7 @@ class DominatorTree; class LoopInfo; class Loop; class PHINode; +class raw_ostream; template class LoopInfoBase; template class LoopBase; @@ -151,10 +147,10 @@ public: /// block that is outside of the current loop. /// bool isLoopExiting(const BlockT *BB) const { - typedef GraphTraits BlockTraits; + typedef GraphTraits BlockTraits; for (typename BlockTraits::ChildIteratorType SI = - BlockTraits::child_begin(const_cast(BB)), - SE = BlockTraits::child_end(const_cast(BB)); SI != SE; ++SI) { + BlockTraits::child_begin(BB), + SE = BlockTraits::child_end(BB); SI != SE; ++SI) { if (!contains(*SI)) return true; } @@ -169,8 +165,8 @@ public: typedef GraphTraits > InvBlockTraits; for (typename InvBlockTraits::ChildIteratorType I = - InvBlockTraits::child_begin(const_cast(H)), - E = InvBlockTraits::child_end(const_cast(H)); I != E; ++I) + InvBlockTraits::child_begin(H), + E = InvBlockTraits::child_end(H); I != E; ++I) if (contains(*I)) ++NumBackEdges; @@ -381,6 +377,20 @@ public: /// isSafeToClone - Return true if the loop body is safe to clone in practice. bool isSafeToClone() const; + /// Returns true if the loop is annotated parallel. + /// + /// A parallel loop can be assumed to not contain any dependencies between + /// iterations by the compiler. That is, any loop-carried dependency checking + /// can be skipped completely when parallelizing the loop on the target + /// machine. Thus, if the parallel loop information originates from the + /// programmer, e.g. via the OpenMP parallel for pragma, it is the + /// programmer's responsibility to ensure there are no loop-carried + /// dependencies. The final execution order of the instructions across + /// iterations is not guaranteed, thus, the end result might or might not + /// implement actual concurrent execution of instructions across multiple + /// iterations. + bool isAnnotatedParallel() const; + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool hasDedicatedExits() const; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 3bb96f96bf52..5485f3c0c04c 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -12,11 +12,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LOOP_INFO_IMPL_H -#define LLVM_ANALYSIS_LOOP_INFO_IMPL_H +#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H +#define LLVM_ANALYSIS_LOOPINFOIMPL_H -#include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopInfo.h" namespace llvm { diff --git a/include/llvm/Analysis/LoopIterator.h b/include/llvm/Analysis/LoopIterator.h index 68f25f74bc28..e3dd96354c65 100644 --- a/include/llvm/Analysis/LoopIterator.h +++ b/include/llvm/Analysis/LoopIterator.h @@ -21,10 +21,9 @@ // reachable from the loop header. //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LOOP_ITERATOR_H -#define LLVM_ANALYSIS_LOOP_ITERATOR_H +#ifndef LLVM_ANALYSIS_LOOPITERATOR_H +#define LLVM_ANALYSIS_LOOPITERATOR_H -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index e6ed9bccee31..5767c1916b39 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -12,13 +12,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LOOP_PASS_H -#define LLVM_LOOP_PASS_H +#ifndef LLVM_ANALYSIS_LOOPPASS_H +#define LLVM_ANALYSIS_LOOPPASS_H #include "llvm/Analysis/LoopInfo.h" #include "llvm/Pass.h" #include "llvm/PassManagers.h" -#include "llvm/Function.h" #include namespace llvm { @@ -39,6 +38,9 @@ public: // whatever action is necessary for the specified Loop. virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0; + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + // Initialization and finalization hooks. virtual bool doInitialization(Loop *L, LPPassManager &LPM) { return false; diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index a842898e4100..63262eb9a364 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -15,12 +15,12 @@ #ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H #define LLVM_ANALYSIS_MEMORYBUILTINS_H -#include "llvm/IRBuilder.h" -#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/ValueHandle.h" @@ -138,12 +138,22 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { // /// \brief Compute the size of the object pointed by Ptr. Returns true and the -/// object size in Size if successful, and false otherwise. +/// object size in Size if successful, and false otherwise. In this context, by +/// object we mean the region of memory starting at Ptr to the end of the +/// underlying object pointed to by Ptr. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, const TargetLibraryInfo *TLI, bool RoundToAlign = false); +/// \brief Compute the size of the underlying object pointed by Ptr. Returns +/// true and the object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, + const DataLayout *TD, const TargetLibraryInfo *TLI, + bool RoundToAlign = false); + typedef std::pair SizeOffsetType; @@ -153,12 +163,14 @@ typedef std::pair SizeOffsetType; class ObjectSizeOffsetVisitor : public InstVisitor { + typedef DenseMap CacheMapTy; + const DataLayout *TD; const TargetLibraryInfo *TLI; bool RoundToAlign; unsigned IntTyBits; APInt Zero; - SmallPtrSet SeenInsts; + CacheMapTy CacheMap; APInt align(APInt Size, uint64_t Align); @@ -191,6 +203,7 @@ public: SizeOffsetType visitExtractElementInst(ExtractElementInst &I); SizeOffsetType visitExtractValueInst(ExtractValueInst &I); SizeOffsetType visitGEPOperator(GEPOperator &GEP); + SizeOffsetType visitGlobalAlias(GlobalAlias &GA); SizeOffsetType visitGlobalVariable(GlobalVariable &GV); SizeOffsetType visitIntToPtrInst(IntToPtrInst&); SizeOffsetType visitLoadInst(LoadInst &I); diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index a715eaeee11c..47afd1b77b0e 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_MEMORY_DEPENDENCE_H -#define LLVM_ANALYSIS_MEMORY_DEPENDENCE_H +#ifndef LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H +#define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H -#include "llvm/BasicBlock.h" -#include "llvm/Pass.h" -#include "llvm/Support/ValueHandle.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Pass.h" +#include "llvm/Support/ValueHandle.h" namespace llvm { class Function; @@ -34,14 +34,14 @@ namespace llvm { class PredIteratorCache; class DominatorTree; class PHITransAddr; - + /// MemDepResult - A memory dependence query can return one of three different /// answers, described below. class MemDepResult { enum DepType { /// Invalid - Clients of MemDep never see this. Invalid = 0, - + /// Clobber - This is a dependence on the specified instruction which /// clobbers the desired value. The pointer member of the MemDepResult /// pair holds the instruction that clobbers the memory. For example, @@ -72,7 +72,7 @@ namespace llvm { /// and no intervening clobbers. No validation is done that the /// operands to the calls are the same. Def, - + /// Other - This marker indicates that the query has no known dependency /// in the specified block. More detailed state info is encoded in the /// upper part of the pair (i.e. the Instruction*) @@ -99,7 +99,7 @@ namespace llvm { explicit MemDepResult(PairTy V) : Value(V) {} public: MemDepResult() : Value(0, Invalid) {} - + /// get methods: These are static ctor methods for creating various /// MemDepResult kinds. static MemDepResult getDef(Instruction *Inst) { @@ -130,7 +130,7 @@ namespace llvm { /// isDef - Return true if this MemDepResult represents a query that is /// an instruction definition dependency. bool isDef() const { return Value.getInt() == Def; } - + /// isNonLocal - Return true if this MemDepResult represents a query that /// is transparent to the start of the block, but where a non-local hasn't /// been done. @@ -145,7 +145,7 @@ namespace llvm { return Value.getInt() == Other && Value.getPointer() == reinterpret_cast(NonFuncLocal); } - + /// isUnknown - Return true if this MemDepResult represents a query which /// cannot and/or will not be computed. bool isUnknown() const { @@ -159,7 +159,7 @@ namespace llvm { if (Value.getInt() == Other) return NULL; return Value.getPointer(); } - + bool operator==(const MemDepResult &M) const { return Value == M.Value; } bool operator!=(const MemDepResult &M) const { return Value != M.Value; } bool operator<(const MemDepResult &M) const { return Value < M.Value; } @@ -175,11 +175,11 @@ namespace llvm { /// In a default-constructed MemDepResult object, the type will be Dirty /// and the instruction pointer will be null. /// - + /// isDirty - Return true if this is a MemDepResult in its dirty/invalid. /// state. bool isDirty() const { return Value.getInt() == Invalid; } - + static MemDepResult getDirty(Instruction *Inst) { return MemDepResult(PairTy(Inst, Invalid)); } @@ -199,16 +199,16 @@ namespace llvm { // BB is the sort key, it can't be changed. BasicBlock *getBB() const { return BB; } - + void setResult(const MemDepResult &R) { Result = R; } const MemDepResult &getResult() const { return Result; } - + bool operator<(const NonLocalDepEntry &RHS) const { return BB < RHS.BB; } }; - + /// NonLocalDepResult - This is a result from a NonLocal dependence query. /// For each BasicBlock (the BB entry) it keeps a MemDepResult and the /// (potentially phi translated) address that was live in the block. @@ -218,17 +218,17 @@ namespace llvm { public: NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address) : Entry(bb, result), Address(address) {} - + // BB is the sort key, it can't be changed. BasicBlock *getBB() const { return Entry.getBB(); } - + void setResult(const MemDepResult &R, Value *Addr) { Entry.setResult(R); Address = Addr; } - + const MemDepResult &getResult() const { return Entry.getResult(); } - + /// getAddress - Return the address of this pointer in this block. This can /// be different than the address queried for the non-local result because /// of phi translation. This returns null if the address was not available @@ -238,7 +238,7 @@ namespace llvm { /// The address is always null for a non-local 'call' dependence. Value *getAddress() const { return Address; } }; - + /// MemoryDependenceAnalysis - This is an analysis that determines, for a /// given memory operation, what preceding memory operations it depends on. /// It builds on alias analysis information, and tries to provide a lazy, @@ -297,30 +297,30 @@ namespace llvm { CachedNonLocalPointerInfo NonLocalPointerDeps; // A map from instructions to their non-local pointer dependencies. - typedef DenseMap > ReverseNonLocalPtrDepTy; ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; - + /// PerInstNLInfo - This is the instruction we keep for each cached access /// that we have for an instruction. The pointer is an owning pointer and /// the bool indicates whether we have any dirty bits in the set. typedef std::pair PerInstNLInfo; - + // A map from instructions to their non-local dependencies. typedef DenseMap NonLocalDepMapType; - + NonLocalDepMapType NonLocalDeps; - + // A reverse mapping from dependencies to the dependees. This is // used when removing instructions to keep the cache coherent. typedef DenseMap > ReverseDepMapType; ReverseDepMapType ReverseLocalDeps; - + // A reverse mapping from dependencies to the non-local dependees. ReverseDepMapType ReverseNonLocalDeps; - + /// Current AA implementation, just a cache. AliasAnalysis *AA; DataLayout *TD; @@ -333,15 +333,15 @@ namespace llvm { /// Pass Implementation stuff. This doesn't do any analysis eagerly. bool runOnFunction(Function &); - + /// Clean up memory in between runs void releaseMemory(); - + /// getAnalysisUsage - Does not modify anything. It uses Value Numbering /// and Alias Analysis. /// virtual void getAnalysisUsage(AnalysisUsage &AU) const; - + /// getDependency - Return the instruction on which a memory operation /// depends. See the class comment for more details. It is illegal to call /// this on non-memory instructions. @@ -360,8 +360,8 @@ namespace llvm { /// removed. Clients must copy this data if they want it around longer than /// that. const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS); - - + + /// getNonLocalPointerDependency - Perform a full dependency query for an /// access to the specified (non-volatile) memory location, returning the /// set of instructions that either define or clobber the value. @@ -374,7 +374,7 @@ namespace llvm { /// removeInstruction - Remove an instruction from the dependence analysis, /// updating the dependence of instructions that previously depended on it. void removeInstruction(Instruction *InstToRemove); - + /// invalidateCachedPointerInfo - This method is used to invalidate cached /// information about the specified pointer, because it may be too /// conservative in memdep. This is an optional call that can be used when @@ -387,20 +387,23 @@ namespace llvm { /// This needs to be done when the CFG changes, e.g., due to splitting /// critical edges. void invalidateCachedPredecessors(); - + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases /// with read-only operations. If isLoad is false, this routine ignores - /// may-aliases with reads from read-only locations. + /// may-aliases with reads from read-only locations. If possible, pass + /// the query instruction as well; this function may take advantage of + /// the metadata annotated to the query instruction to refine the result. /// /// Note that this is an uncached query, and thus may be inefficient. /// MemDepResult getPointerDependencyFrom(const AliasAnalysis::Location &Loc, - bool isLoad, + bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB); - - + BasicBlock *BB, + Instruction *QueryInst = 0); + + /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that /// looks at a memory location for a load (specified by MemLocBase, Offs, /// and Size) and compares it against a load. If the specified load could @@ -413,7 +416,7 @@ namespace llvm { unsigned MemLocSize, const LoadInst *LI, const DataLayout &TD); - + private: MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, BasicBlock::iterator ScanIt, @@ -430,11 +433,11 @@ namespace llvm { unsigned NumSortedEntries); void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); - + /// verifyRemoved - Verify that the specified instruction does not occur /// in our internal data structures. void verifyRemoved(Instruction *Inst) const; - + }; } // End llvm namespace diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h index 5a77fcebafa0..d7a3dd889a1b 100644 --- a/include/llvm/Analysis/PHITransAddr.h +++ b/include/llvm/Analysis/PHITransAddr.h @@ -14,8 +14,8 @@ #ifndef LLVM_ANALYSIS_PHITRANSADDR_H #define LLVM_ANALYSIS_PHITRANSADDR_H -#include "llvm/Instruction.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instruction.h" namespace llvm { class DominatorTree; diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index 27726f49bcce..ae117135db93 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -198,9 +198,6 @@ namespace llvm { // analyze. FunctionPass *createInstCountPass(); - // print debug info intrinsics in human readable form - FunctionPass *createDbgInfoPrinterPass(); - //===--------------------------------------------------------------------===// // // createRegionInfoPass - This pass finds all single entry single exit regions diff --git a/include/llvm/Analysis/PathNumbering.h b/include/llvm/Analysis/PathNumbering.h index 7025e28484cc..400a37d8293f 100644 --- a/include/llvm/Analysis/PathNumbering.h +++ b/include/llvm/Analysis/PathNumbering.h @@ -23,14 +23,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_PATH_NUMBERING_H -#define LLVM_PATH_NUMBERING_H +#ifndef LLVM_ANALYSIS_PATHNUMBERING_H +#define LLVM_ANALYSIS_PATHNUMBERING_H -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Analysis/ProfileInfoTypes.h" #include #include #include diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h index cef6d2d2a6c8..4fce16ef0d56 100644 --- a/include/llvm/Analysis/PathProfileInfo.h +++ b/include/llvm/Analysis/PathProfileInfo.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_PATHPROFILEINFO_H -#define LLVM_PATHPROFILEINFO_H +#ifndef LLVM_ANALYSIS_PATHPROFILEINFO_H +#define LLVM_ANALYSIS_PATHPROFILEINFO_H -#include "llvm/BasicBlock.h" #include "llvm/Analysis/PathNumbering.h" +#include "llvm/IR/BasicBlock.h" namespace llvm { diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 0eddb9105e60..d082297454a1 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_POST_DOMINATORS_H -#define LLVM_ANALYSIS_POST_DOMINATORS_H +#ifndef LLVM_ANALYSIS_POSTDOMINATORS_H +#define LLVM_ANALYSIS_POSTDOMINATORS_H #include "llvm/Analysis/Dominators.h" diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h index 9efbafcef41c..90097f79951d 100644 --- a/include/llvm/Analysis/ProfileDataLoader.h +++ b/include/llvm/Analysis/ProfileDataLoader.h @@ -16,6 +16,7 @@ #ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H #define LLVM_ANALYSIS_PROFILEDATALOADER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h index 6c2e2732d344..5d17fa1220e1 100644 --- a/include/llvm/Analysis/ProfileInfo.h +++ b/include/llvm/Analysis/ProfileInfo.h @@ -26,9 +26,9 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include +#include namespace llvm { class Pass; diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h index dcf3b38ddcd5..e0f49f3179bc 100644 --- a/include/llvm/Analysis/ProfileInfoLoader.h +++ b/include/llvm/Analysis/ProfileInfoLoader.h @@ -16,9 +16,9 @@ #ifndef LLVM_ANALYSIS_PROFILEINFOLOADER_H #define LLVM_ANALYSIS_PROFILEINFOLOADER_H -#include #include #include +#include namespace llvm { diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h new file mode 100644 index 000000000000..1802fe88e356 --- /dev/null +++ b/include/llvm/Analysis/PtrUseVisitor.h @@ -0,0 +1,285 @@ +//===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides a collection of visitors which walk the (instruction) +/// uses of a pointer. These visitors all provide the same essential behavior +/// as an InstVisitor with similar template-based flexibility and +/// implementation strategies. +/// +/// These can be used, for example, to quickly analyze the uses of an alloca, +/// global variable, or function argument. +/// +/// FIXME: Provide a variant which doesn't track offsets and is cheaper. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PTRUSEVISITOR_H +#define LLVM_ANALYSIS_PTRUSEVISITOR_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InstVisitor.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +namespace detail { +/// \brief Implementation of non-dependent functionality for \c PtrUseVisitor. +/// +/// See \c PtrUseVisitor for the public interface and detailed comments about +/// usage. This class is just a helper base class which is not templated and +/// contains all common code to be shared between different instantiations of +/// PtrUseVisitor. +class PtrUseVisitorBase { +public: + /// \brief This class provides information about the result of a visit. + /// + /// After walking all the users (recursively) of a pointer, the basic + /// infrastructure records some commonly useful information such as escape + /// analysis and whether the visit completed or aborted early. + class PtrInfo { + public: + PtrInfo() : AbortedInfo(0, false), EscapedInfo(0, false) {} + + /// \brief Reset the pointer info, clearing all state. + void reset() { + AbortedInfo.setPointer(0); + AbortedInfo.setInt(false); + EscapedInfo.setPointer(0); + EscapedInfo.setInt(false); + } + + /// \brief Did we abort the visit early? + bool isAborted() const { return AbortedInfo.getInt(); } + + /// \brief Is the pointer escaped at some point? + bool isEscaped() const { return EscapedInfo.getInt(); } + + /// \brief Get the instruction causing the visit to abort. + /// \returns a pointer to the instruction causing the abort if one is + /// available; otherwise returns null. + Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); } + + /// \brief Get the instruction causing the pointer to escape. + /// \returns a pointer to the instruction which escapes the pointer if one + /// is available; otherwise returns null. + Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); } + + /// \brief Mark the visit as aborted. Intended for use in a void return. + /// \param I The instruction which caused the visit to abort, if available. + void setAborted(Instruction *I = 0) { + AbortedInfo.setInt(true); + AbortedInfo.setPointer(I); + } + + /// \brief Mark the pointer as escaped. Intended for use in a void return. + /// \param I The instruction which escapes the pointer, if available. + void setEscaped(Instruction *I = 0) { + EscapedInfo.setInt(true); + EscapedInfo.setPointer(I); + } + + /// \brief Mark the pointer as escaped, and the visit as aborted. Intended + /// for use in a void return. + /// \param I The instruction which both escapes the pointer and aborts the + /// visit, if available. + void setEscapedAndAborted(Instruction *I = 0) { + setEscaped(I); + setAborted(I); + } + + private: + PointerIntPair AbortedInfo, EscapedInfo; + }; + +protected: + const DataLayout &DL; + + /// \name Visitation infrastructure + /// @{ + + /// \brief The info collected about the pointer being visited thus far. + PtrInfo PI; + + /// \brief A struct of the data needed to visit a particular use. + /// + /// This is used to maintain a worklist fo to-visit uses. This is used to + /// make the visit be iterative rather than recursive. + struct UseToVisit { + typedef PointerIntPair UseAndIsOffsetKnownPair; + UseAndIsOffsetKnownPair UseAndIsOffsetKnown; + APInt Offset; + }; + + /// \brief The worklist of to-visit uses. + SmallVector Worklist; + + /// \brief A set of visited uses to break cycles in unreachable code. + SmallPtrSet VisitedUses; + + /// @} + + + /// \name Per-visit state + /// This state is reset for each instruction visited. + /// @{ + + /// \brief The use currently being visited. + Use *U; + + /// \brief True if we have a known constant offset for the use currently + /// being visited. + bool IsOffsetKnown; + + /// \brief The constant offset of the use if that is known. + APInt Offset; + + /// @} + + + /// Note that the constructor is protected because this class must be a base + /// class, we can't create instances directly of this class. + PtrUseVisitorBase(const DataLayout &DL) : DL(DL) {} + + /// \brief Enqueue the users of this instruction in the visit worklist. + /// + /// This will visit the users with the same offset of the current visit + /// (including an unknown offset if that is the current state). + void enqueueUsers(Instruction &I); + + /// \brief Walk the operands of a GEP and adjust the offset as appropriate. + /// + /// This routine does the heavy lifting of the pointer walk by computing + /// offsets and looking through GEPs. + bool adjustOffsetForGEP(GetElementPtrInst &GEPI); +}; +} // end namespace detail + +/// \brief A base class for visitors over the uses of a pointer value. +/// +/// Once constructed, a user can call \c visit on a pointer value, and this +/// will walk its uses and visit each instruction using an InstVisitor. It also +/// provides visit methods which will recurse through any pointer-to-pointer +/// transformations such as GEPs and bitcasts. +/// +/// During the visit, the current Use* being visited is available to the +/// subclass, as well as the current offset from the original base pointer if +/// known. +/// +/// The recursive visit of uses is accomplished with a worklist, so the only +/// ordering guarantee is that an instruction is visited before any uses of it +/// are visited. Note that this does *not* mean before any of its users are +/// visited! This is because users can be visited multiple times due to +/// multiple, different uses of pointers derived from the same base. +/// +/// A particular Use will only be visited once, but a User may be visited +/// multiple times, once per Use. This visits may notably have different +/// offsets. +/// +/// All visit methods on the underlying InstVisitor return a boolean. This +/// return short-circuits the visit, stopping it immediately. +/// +/// FIXME: Generalize this for all values rather than just instructions. +template +class PtrUseVisitor : protected InstVisitor, + public detail::PtrUseVisitorBase { + friend class InstVisitor; + typedef InstVisitor Base; + +public: + PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {} + + /// \brief Recursively visit the uses of the given pointer. + /// \returns An info struct about the pointer. See \c PtrInfo for details. + PtrInfo visitPtr(Instruction &I) { + // This must be a pointer type. Get an integer type suitable to hold + // offsets on this pointer. + // FIXME: Support a vector of pointers. + assert(I.getType()->isPointerTy()); + IntegerType *IntPtrTy = cast(DL.getIntPtrType(I.getType())); + IsOffsetKnown = true; + Offset = APInt(IntPtrTy->getBitWidth(), 0); + PI.reset(); + + // Enqueue the uses of this pointer. + enqueueUsers(I); + + // Visit all the uses off the worklist until it is empty. + while (!Worklist.empty()) { + UseToVisit ToVisit = Worklist.pop_back_val(); + U = ToVisit.UseAndIsOffsetKnown.getPointer(); + IsOffsetKnown = ToVisit.UseAndIsOffsetKnown.getInt(); + if (IsOffsetKnown) + Offset = llvm_move(ToVisit.Offset); + + Instruction *I = cast(U->getUser()); + static_cast(this)->visit(I); + if (PI.isAborted()) + break; + } + return PI; + } + +protected: + void visitStoreInst(StoreInst &SI) { + if (SI.getValueOperand() == U->get()) + PI.setEscaped(&SI); + } + + void visitBitCastInst(BitCastInst &BC) { + enqueueUsers(BC); + } + + void visitPtrToIntInst(PtrToIntInst &I) { + PI.setEscaped(&I); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + if (GEPI.use_empty()) + return; + + // If we can't walk the GEP, clear the offset. + if (!adjustOffsetForGEP(GEPI)) { + IsOffsetKnown = false; + Offset = APInt(); + } + + // Enqueue the users now that the offset has been adjusted. + enqueueUsers(GEPI); + } + + // No-op intrinsics which we know don't escape the pointer to to logic in + // some other function. + void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {} + void visitMemIntrinsic(MemIntrinsic &I) {} + void visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + default: + return Base::visitIntrinsicInst(II); + + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return; // No-op intrinsics. + } + } + + // Generically, arguments to calls and invokes escape the pointer to some + // other function. Mark that. + void visitCallSite(CallSite CS) { + PI.setEscaped(CS.getInstruction()); + Base::visitCallSite(CS); + } +}; + +} + +#endif diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 48d7ee6b5476..69cc29381136 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -24,8 +24,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_REGION_INFO_H -#define LLVM_ANALYSIS_REGION_INFO_H +#ifndef LLVM_ANALYSIS_REGIONINFO_H +#define LLVM_ANALYSIS_REGIONINFO_H #include "llvm/ADT/PointerIntPair.h" #include "llvm/Analysis/DominanceFrontier.h" diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h index 7adc71ca82ac..8fd42637276e 100644 --- a/include/llvm/Analysis/RegionIterator.h +++ b/include/llvm/Analysis/RegionIterator.h @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// // This file defines the iterators to iterate over the elements of a Region. //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_REGION_ITERATOR_H -#define LLVM_ANALYSIS_REGION_ITERATOR_H +#ifndef LLVM_ANALYSIS_REGIONITERATOR_H +#define LLVM_ANALYSIS_REGIONITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/raw_ostream.h" diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h index 68f12012bcd1..0690ac5e34a7 100644 --- a/include/llvm/Analysis/RegionPass.h +++ b/include/llvm/Analysis/RegionPass.h @@ -13,15 +13,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_REGION_PASS_H -#define LLVM_REGION_PASS_H +#ifndef LLVM_ANALYSIS_REGIONPASS_H +#define LLVM_ANALYSIS_REGIONPASS_H #include "llvm/Analysis/RegionInfo.h" - +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/PassManagers.h" -#include "llvm/Function.h" - #include namespace llvm { @@ -59,6 +57,9 @@ public: /// @return The pass to print the LLVM IR in the region. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const; + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; } virtual bool doFinalization() { return false; } //@} diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 235adca02175..306549fba46c 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -21,16 +21,16 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H #define LLVM_ANALYSIS_SCALAREVOLUTION_H +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Function.h" -#include "llvm/Operator.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ConstantRange.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ValueHandle.h" #include namespace llvm { @@ -338,6 +338,10 @@ namespace llvm { /// getMax - Get the max backedge taken count for the loop. const SCEV *getMax(ScalarEvolution *SE) const; + /// Return true if any backedge taken count expressions refer to the given + /// subexpression. + bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; + /// clear - Invalidate this result and free associated memory. void clear(); }; @@ -831,7 +835,7 @@ namespace llvm { /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with /// predicate Pred. Return true iff any changes were made. If the - /// operands are provably equal or inequal, LHS and RHS are set to + /// operands are provably equal or unequal, LHS and RHS are set to /// the same value and Pred is set to either ICMP_EQ or ICMP_NE. /// bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 3f8f149cb420..00779fc329b1 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -11,18 +11,18 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H -#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H -#include "llvm/IRBuilder.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/ValueHandle.h" #include namespace llvm { - class TargetLowering; + class TargetTransformInfo; /// Return true if the given expression is safe to expand in the sense that /// all materialized values are safe to speculate. @@ -40,8 +40,10 @@ namespace llvm { // New instructions receive a name to identifies them with the current pass. const char* IVName; - std::map, AssertingVH > + // InsertedExpressions caches Values for reuse, so must track RAUW. + std::map, TrackingVH > InsertedExpressions; + // InsertedValues only flags inserted instructions so needs no RAUW. std::set > InsertedValues; std::set > InsertedPostIncValues; @@ -129,7 +131,7 @@ namespace llvm { /// representative. Return the number of phis eliminated. unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, - const TargetLowering *TLI = NULL); + const TargetTransformInfo *TTI = NULL); /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index 54db7d6bcf0d..eac91131ad53 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H -#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -548,6 +548,151 @@ namespace llvm { SCEVTraversal T(Visitor); T.visitAll(Root); } + + /// The SCEVRewriter takes a scalar evolution expression and copies all its + /// components. The result after a rewrite is an identical SCEV. + struct SCEVRewriter + : public SCEVVisitor { + public: + SCEVRewriter(ScalarEvolution &S) : SE(S) {} + + virtual ~SCEVRewriter() {} + + virtual const SCEV *visitConstant(const SCEVConstant *Constant) { + return Constant; + } + + virtual const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + const SCEV *Operand = visit(Expr->getOperand()); + return SE.getTruncateExpr(Operand, Expr->getType()); + } + + virtual const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + const SCEV *Operand = visit(Expr->getOperand()); + return SE.getZeroExtendExpr(Operand, Expr->getType()); + } + + virtual const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + const SCEV *Operand = visit(Expr->getOperand()); + return SE.getSignExtendExpr(Operand, Expr->getType()); + } + + virtual const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + return SE.getAddExpr(Operands); + } + + virtual const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + return SE.getMulExpr(Operands); + } + + virtual const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); + } + + virtual const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + return SE.getAddRecExpr(Operands, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + virtual const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + return SE.getSMaxExpr(Operands); + } + + virtual const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + return SE.getUMaxExpr(Operands); + } + + virtual const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return Expr; + } + + virtual const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + + protected: + ScalarEvolution &SE; + }; + + typedef DenseMap ValueToValueMap; + + /// The SCEVParameterRewriter takes a scalar evolution expression and updates + /// the SCEVUnknown components following the Map (Value -> Value). + struct SCEVParameterRewriter: public SCEVRewriter { + public: + static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, + ValueToValueMap &Map) { + SCEVParameterRewriter Rewriter(SE, Map); + return Rewriter.visit(Scev); + } + SCEVParameterRewriter(ScalarEvolution &S, ValueToValueMap &M) + : SCEVRewriter(S), Map(M) {} + + virtual const SCEV *visitUnknown(const SCEVUnknown *Expr) { + Value *V = Expr->getValue(); + if (Map.count(V)) + return SE.getUnknown(Map[V]); + return Expr; + } + + private: + ValueToValueMap ⤅ + }; + + typedef DenseMap LoopToScevMapT; + + /// The SCEVApplyRewriter takes a scalar evolution expression and applies + /// the Map (Loop -> SCEV) to all AddRecExprs. + struct SCEVApplyRewriter: public SCEVRewriter { + public: + static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map, + ScalarEvolution &SE) { + SCEVApplyRewriter Rewriter(SE, Map); + return Rewriter.visit(Scev); + } + SCEVApplyRewriter(ScalarEvolution &S, LoopToScevMapT &M) + : SCEVRewriter(S), Map(M) {} + + virtual const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(visit(Expr->getOperand(i))); + + const Loop *L = Expr->getLoop(); + const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags()); + + if (0 == Map.count(L)) + return Res; + + const SCEVAddRecExpr *Rec = (const SCEVAddRecExpr *) Res; + return Rec->evaluateAtIteration(Map[L], SE); + } + + private: + LoopToScevMapT ⤅ + }; + +/// Applies the Map (Loop -> SCEV) to the given Scev. +static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map, + ScalarEvolution &SE) { + return SCEVApplyRewriter::rewrite(Scev, Map, SE); +} + } #endif diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h index 342e5937891a..7c6423a21cfa 100644 --- a/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -33,8 +33,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H -#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #include "llvm/ADT/SmallPtrSet.h" diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index b758eca42e78..76c8ccf59c2b 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_SPARSE_PROPAGATION_H -#define LLVM_ANALYSIS_SPARSE_PROPAGATION_H +#ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H +#define LLVM_ANALYSIS_SPARSEPROPAGATION_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include #include +#include namespace llvm { class Value; @@ -203,4 +203,4 @@ private: } // end namespace llvm -#endif // LLVM_ANALYSIS_SPARSE_PROPAGATION_H +#endif // LLVM_ANALYSIS_SPARSEPROPAGATION_H diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h new file mode 100644 index 000000000000..a9d6725d86b0 --- /dev/null +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -0,0 +1,349 @@ +//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass exposes codegen information to IR-level passes. Every +// transformation that uses codegen information is broken into three parts: +// 1. The IR-level analysis pass. +// 2. The IR-level transformation interface which provides the needed +// information. +// 3. Codegen-level implementation which uses target-specific hooks. +// +// This file defines #2, which is the interface that IR-level transformations +// use for querying the codegen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class GlobalValue; +class Type; +class User; +class Value; + +/// TargetTransformInfo - This pass provides access to the codegen +/// interfaces that are needed for IR-level transformations. +class TargetTransformInfo { +protected: + /// \brief The TTI instance one level down the stack. + /// + /// This is used to implement the default behavior all of the methods which + /// is to delegate up through the stack of TTIs until one can answer the + /// query. + TargetTransformInfo *PrevTTI; + + /// \brief The top of the stack of TTI analyses available. + /// + /// This is a convenience routine maintained as TTI analyses become available + /// that complements the PrevTTI delegation chain. When one part of an + /// analysis pass wants to query another part of the analysis pass it can use + /// this to start back at the top of the stack. + TargetTransformInfo *TopTTI; + + /// All pass subclasses must in their initializePass routine call + /// pushTTIStack with themselves to update the pointers tracking the previous + /// TTI instance in the analysis group's stack, and the top of the analysis + /// group's stack. + void pushTTIStack(Pass *P); + + /// All pass subclasses must in their finalizePass routine call popTTIStack + /// to update the pointers tracking the previous TTI instance in the analysis + /// group's stack, and the top of the analysis group's stack. + void popTTIStack(); + + /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + +public: + /// This class is intended to be subclassed by real implementations. + virtual ~TargetTransformInfo() = 0; + + /// \name Generic Target Information + /// @{ + + /// \brief Underlying constants for 'cost' values in this interface. + /// + /// Many APIs in this interface return a cost. This enum defines the + /// fundamental values that should be used to interpret (and produce) those + /// costs. The costs are returned as an unsigned rather than a member of this + /// enumeration because it is expected that the cost of one IR instruction + /// may have a multiplicative factor to it or otherwise won't fit directly + /// into the enum. Moreover, it is common to sum or average costs which works + /// better as simple integral values. Thus this enum only provides constants. + /// + /// Note that these costs should usually reflect the intersection of code-size + /// cost and execution cost. A free instruction is typically one that folds + /// into another instruction. For example, reg-to-reg moves can often be + /// skipped by renaming the registers in the CPU, but they still are encoded + /// and thus wouldn't be considered 'free' here. + enum TargetCostConstants { + TCC_Free = 0, ///< Expected to fold away in lowering. + TCC_Basic = 1, ///< The cost of a typical 'add' instruction. + TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. + }; + + /// \brief Estimate the cost of a specific operation when lowered. + /// + /// Note that this is designed to work on an arbitrary synthetic opcode, and + /// thus work for hypothetical queries before an instruction has even been + /// formed. However, this does *not* work for GEPs, and must not be called + /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as + /// analyzing a GEP's cost required more information. + /// + /// Typically only the result type is required, and the operand type can be + /// omitted. However, if the opcode is one of the cast instructions, the + /// operand type is required. + /// + /// The returned cost is defined in terms of \c TargetCostConstants, see its + /// comments for a detailed explanation of the cost values. + virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy = 0) const; + + /// \brief Estimate the cost of a GEP operation when lowered. + /// + /// The contract for this function is the same as \c getOperationCost except + /// that it supports an interface that provides extra information specific to + /// the GEP operation. + virtual unsigned getGEPCost(const Value *Ptr, + ArrayRef Operands) const; + + /// \brief Estimate the cost of a function call when lowered. + /// + /// The contract for this is the same as \c getOperationCost except that it + /// supports an interface that provides extra information specific to call + /// instructions. + /// + /// This is the most basic query for estimating call cost: it only knows the + /// function type and (potentially) the number of arguments at the call site. + /// The latter is only interesting for varargs function types. + virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const; + + /// \brief Estimate the cost of calling a specific function when lowered. + /// + /// This overload adds the ability to reason about the particular function + /// being called in the event it is a library call with special lowering. + virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const; + + /// \brief Estimate the cost of calling a specific function when lowered. + /// + /// This overload allows specifying a set of candidate argument values. + virtual unsigned getCallCost(const Function *F, + ArrayRef Arguments) const; + + /// \brief Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) const; + + /// \brief Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Arguments) const; + + /// \brief Estimate the cost of a given IR user when lowered. + /// + /// This can estimate the cost of either a ConstantExpr or Instruction when + /// lowered. It has two primary advantages over the \c getOperationCost and + /// \c getGEPCost above, and one significant disadvantage: it can only be + /// used when the IR construct has already been formed. + /// + /// The advantages are that it can inspect the SSA use graph to reason more + /// accurately about the cost. For example, all-constant-GEPs can often be + /// folded into a load or other instruction, but if they are used in some + /// other context they may not be folded. This routine can distinguish such + /// cases. + /// + /// The returned cost is defined in terms of \c TargetCostConstants, see its + /// comments for a detailed explanation of the cost values. + virtual unsigned getUserCost(const User *U) const; + + /// \brief Test whether calls to a function lower to actual program function + /// calls. + /// + /// The idea is to test whether the program is likely to require a 'call' + /// instruction or equivalent in order to call the given function. + /// + /// FIXME: It's not clear that this is a good or useful query API. Client's + /// should probably move to simpler cost metrics using the above. + /// Alternatively, we could split the cost interface into distinct code-size + /// and execution-speed costs. This would allow modelling the core of this + /// query more accurately as the a call is a single small instruction, but + /// incurs significant execution cost. + virtual bool isLoweredToCall(const Function *F) const; + + /// @} + + /// \name Scalar Target Information + /// @{ + + /// \brief Flags indicating the kind of support for population count. + /// + /// Compared to the SW implementation, HW support is supposed to + /// significantly boost the performance when the population is dense, and it + /// may or may not degrade performance if the population is sparse. A HW + /// support is considered as "Fast" if it can outperform, or is on a par + /// with, SW implementation when the population is sparse; otherwise, it is + /// considered as "Slow". + enum PopcntSupportKind { + PSK_Software, + PSK_SlowHardware, + PSK_FastHardware + }; + + /// isLegalAddImmediate - Return true if the specified immediate is legal + /// add immediate, that is the target has add instructions which can add + /// a register with the immediate without having to materialize the + /// immediate into a register. + virtual bool isLegalAddImmediate(int64_t Imm) const; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can compare + /// a register against the immediate without having to materialize the + /// immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented by + /// AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; + + /// isTruncateFree - Return true if it's free to truncate a value of + /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in + /// register EAX to i16 by referencing its sub-register AX. + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + + /// Is this type legal. + virtual bool isTypeLegal(Type *Ty) const; + + /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes + virtual unsigned getJumpBufAlignment() const; + + /// getJumpBufSize - returns the target's jmp_buf size in bytes. + virtual unsigned getJumpBufSize() const; + + /// shouldBuildLookupTables - Return true if switches should be turned into + /// lookup tables for the target. + virtual bool shouldBuildLookupTables() const; + + /// getPopcntSupport - Return hardware support for population count. + virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; + + /// getIntImmCost - Return the expected cost of materializing the given + /// integer immediate of the specified type. + virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// @} + + /// \name Vector Target Information + /// @{ + + /// \brief The various kinds of shuffle patterns for vector queries. + enum ShuffleKind { + SK_Broadcast, ///< Broadcast element 0 to all other elements. + SK_Reverse, ///< Reverse the order of the vector. + SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. + SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. + }; + + /// \brief Additonal information about an operand's possible values. + enum OperandValueKind { + OK_AnyValue, // Operand can have any value. + OK_UniformValue, // Operand is uniform (splat of a value). + OK_UniformConstantValue // Operand is uniform constant. + }; + + /// \return The number of scalar or vector registers that the target has. + /// If 'Vectors' is true, it returns the number of vector registers. If it is + /// set to false, it returns the number of scalar registers. + virtual unsigned getNumberOfRegisters(bool Vector) const; + + /// \return The width of the largest scalar or vector register type. + virtual unsigned getRegisterBitWidth(bool Vector) const; + + /// \return The maximum unroll factor that the vectorizer should try to + /// perform for this target. This number depends on the level of parallelism + /// and the number of execution units in the CPU. + virtual unsigned getMaximumUnrollFactor() const; + + /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue) const; + + /// \return The cost of a shuffle instruction of kind Kind and of type Tp. + /// The index and subtype parameters are used by the subvector insertion and + /// extraction shuffle kinds. + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, + Type *SubTp = 0) const; + + /// \return The expected cost of cast instructions, such as bitcast, trunc, + /// zext, etc. + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + + /// \return The expected cost of control-flow related instructions such as + /// Phi, Ret, Br. + virtual unsigned getCFInstrCost(unsigned Opcode) const; + + /// \returns The expected cost of compare and select instructions. + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const; + + /// \return The expected cost of vector Insert and Extract. + /// Use -1 to indicate that there is no information on the index value. + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const; + + /// \return The cost of Load and Store instructions. + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// \returns The cost of Intrinsic instructions. + virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Tys) const; + + /// \returns The number of pieces into which the provided type must be + /// split during legalization. Zero is returned when the answer is unknown. + virtual unsigned getNumberOfParts(Type *Tp) const; + + /// \returns The cost of the address computation. For most targets this can be + /// merged into the instruction indexing mode. Some targets might want to + /// distinguish between address computation for memory operations on vector + /// types and scalar types. Such targets should override this function. + virtual unsigned getAddressComputationCost(Type *Ty) const; + + /// @} + + /// Analysis group identification. + static char ID; +}; + +/// \brief Create the base case instance of a pass in the TTI analysis group. +/// +/// This class provides the base case for the stack of TTI analyzes. It doesn't +/// delegate to anything and uses the STTI and VTTI objects passed in to +/// satisfy the queries. +ImmutablePass *createNoTargetTransformInfoPass(); + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h index 99651e192d3b..bedd654c6521 100644 --- a/include/llvm/Analysis/Trace.h +++ b/include/llvm/Analysis/Trace.h @@ -18,8 +18,8 @@ #ifndef LLVM_ANALYSIS_TRACE_H #define LLVM_ANALYSIS_TRACE_H -#include #include +#include namespace llvm { class BasicBlock; @@ -116,4 +116,4 @@ public: } // end namespace llvm -#endif // TRACE_H +#endif // LLVM_ANALYSIS_TRACE_H diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index a85752446bb0..3775ec9f07aa 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -45,13 +45,12 @@ namespace llvm { void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout *TD = 0, unsigned Depth = 0); - /// isPowerOfTwo - Return true if the given value is known to have exactly one - /// bit set when defined. For vectors return true if every element is known to - /// be a power of two when defined. Supports values with integer or pointer - /// type and vectors of integers. If 'OrZero' is set then returns true if the - /// given value is either a power of two or zero. - bool isPowerOfTwo(Value *V, const DataLayout *TD = 0, bool OrZero = false, - unsigned Depth = 0); + /// isKnownToBeAPowerOfTwo - Return true if the given value is known to have + /// exactly one bit set when defined. For vectors return true if every + /// element is known to be a power of two when defined. Supports values with + /// integer or pointer type and vectors of integers. If 'OrZero' is set then + /// returns true if the given value is either a power of two or zero. + bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero = false, unsigned Depth = 0); /// isKnownNonZero - Return true if the given value is known to be non-zero /// when defined. For vectors return true if every element is known to be @@ -118,10 +117,10 @@ namespace llvm { /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD); + const DataLayout *TD); static inline const Value * GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, - const DataLayout &TD) { + const DataLayout *TD) { return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset,TD); } @@ -184,6 +183,11 @@ namespace llvm { bool isSafeToSpeculativelyExecute(const Value *V, const DataLayout *TD = 0); + /// isKnownNonNull - Return true if this pointer couldn't possibly be null by + /// its definition. This returns true for allocas, non-extern-weak globals + /// and byval arguments. + bool isKnownNonNull(const Value *V); + } // end namespace llvm #endif diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h deleted file mode 100644 index b1c22185191d..000000000000 --- a/include/llvm/Argument.h +++ /dev/null @@ -1,91 +0,0 @@ -//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Argument class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARGUMENT_H -#define LLVM_ARGUMENT_H - -#include "llvm/Value.h" -#include "llvm/Attributes.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/Twine.h" - -namespace llvm { - -template - class SymbolTableListTraits; - -/// A class to represent an incoming formal argument to a Function. An argument -/// is a very simple Value. It is essentially a named (optional) type. When used -/// in the body of a function, it represents the value of the actual argument -/// the function was called with. -/// @brief LLVM Argument representation -class Argument : public Value, public ilist_node { - virtual void anchor(); - Function *Parent; - - friend class SymbolTableListTraits; - void setParent(Function *parent); - -public: - /// Argument ctor - If Function argument is specified, this argument is - /// inserted at the end of the argument list for the function. - /// - explicit Argument(Type *Ty, const Twine &Name = "", Function *F = 0); - - inline const Function *getParent() const { return Parent; } - inline Function *getParent() { return Parent; } - - /// getArgNo - Return the index of this formal argument in its containing - /// function. For example in "void foo(int a, float b)" a is 0 and b is 1. - unsigned getArgNo() const; - - /// hasByValAttr - Return true if this argument has the byval attribute on it - /// in its containing function. - bool hasByValAttr() const; - - /// getParamAlignment - If this is a byval argument, return its alignment. - unsigned getParamAlignment() const; - - /// hasNestAttr - Return true if this argument has the nest attribute on - /// it in its containing function. - bool hasNestAttr() const; - - /// hasNoAliasAttr - Return true if this argument has the noalias attribute on - /// it in its containing function. - bool hasNoAliasAttr() const; - - /// hasNoCaptureAttr - Return true if this argument has the nocapture - /// attribute on it in its containing function. - bool hasNoCaptureAttr() const; - - /// hasStructRetAttr - Return true if this argument has the sret attribute on - /// it in its containing function. - bool hasStructRetAttr() const; - - /// addAttr - Add a Attribute to an argument - void addAttr(Attributes); - - /// removeAttr - Remove a Attribute from an argument - void removeAttr(Attributes); - - /// classof - Methods for support type inquiry through isa, cast, and - /// dyn_cast: - /// - static inline bool classof(const Value *V) { - return V->getValueID() == ArgumentVal; - } -}; - -} // End llvm namespace - -#endif diff --git a/include/llvm/Assembly/PrintModulePass.h b/include/llvm/Assembly/PrintModulePass.h index 239fbcc0c8ca..02b9bd9be505 100644 --- a/include/llvm/Assembly/PrintModulePass.h +++ b/include/llvm/Assembly/PrintModulePass.h @@ -23,6 +23,7 @@ namespace llvm { class FunctionPass; class ModulePass; + class BasicBlockPass; class raw_ostream; /// createPrintModulePass - Create and return a pass that writes the @@ -37,6 +38,11 @@ namespace llvm { raw_ostream *OS, bool DeleteStream=false); + /// createPrintBasicBlockPass - Create and return a pass that writes the + /// BB to the specified raw_ostream. + BasicBlockPass *createPrintBasicBlockPass(raw_ostream *OS, + bool DeleteStream=false, + const std::string &Banner = ""); } // End llvm namespace #endif diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h deleted file mode 100644 index a9c2d743ff4a..000000000000 --- a/include/llvm/Attributes.h +++ /dev/null @@ -1,431 +0,0 @@ -//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the simple types necessary to represent the -// attributes associated with functions and their calls. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ATTRIBUTES_H -#define LLVM_ATTRIBUTES_H - -#include "llvm/Support/MathExtras.h" -#include "llvm/ADT/ArrayRef.h" -#include -#include - -namespace llvm { - -class AttrBuilder; -class AttributesImpl; -class LLVMContext; -class Type; - -/// Attributes - A bitset of attributes. -class Attributes { -public: - /// Function parameters and results can have attributes to indicate how they - /// should be treated by optimizations and code generation. This enumeration - /// lists the attributes that can be associated with parameters, function - /// results or the function itself. - /// - /// Note that uwtable is about the ABI or the user mandating an entry in the - /// unwind table. The nounwind attribute is about an exception passing by the - /// function. - /// - /// In a theoretical system that uses tables for profiling and sjlj for - /// exceptions, they would be fully independent. In a normal system that uses - /// tables for both, the semantics are: - /// - /// nil = Needs an entry because an exception might pass by. - /// nounwind = No need for an entry - /// uwtable = Needs an entry because the ABI says so and because - /// an exception might pass by. - /// uwtable + nounwind = Needs an entry because the ABI says so. - - enum AttrVal { - // IR-Level Attributes - None, ///< No attributes have been set - AddressSafety, ///< Address safety checking is on. - Alignment, ///< Alignment of parameter (5 bits) - ///< stored as log2 of alignment with +1 bias - ///< 0 means unaligned different from align 1 - AlwaysInline, ///< inline=always - ByVal, ///< Pass structure by value - InlineHint, ///< Source said inlining was desirable - InReg, ///< Force argument to be passed in register - MinSize, ///< Function must be optimized for size first - Naked, ///< Naked function - Nest, ///< Nested function static chain - NoAlias, ///< Considered to not alias after call - NoCapture, ///< Function creates no aliases of pointer - NoImplicitFloat, ///< Disable implicit floating point insts - NoInline, ///< inline=never - NonLazyBind, ///< Function is called early and/or - ///< often, so lazy binding isn't worthwhile - NoRedZone, ///< Disable redzone - NoReturn, ///< Mark the function as not returning - NoUnwind, ///< Function doesn't unwind stack - OptimizeForSize, ///< opt_size - ReadNone, ///< Function does not access memory - ReadOnly, ///< Function only reads from memory - ReturnsTwice, ///< Function can return twice - SExt, ///< Sign extended before/after call - StackAlignment, ///< Alignment of stack for function (3 bits) - ///< stored as log2 of alignment with +1 bias 0 - ///< means unaligned (different from - ///< alignstack={1)) - StackProtect, ///< Stack protection. - StackProtectReq, ///< Stack protection required. - StructRet, ///< Hidden pointer to structure to return - UWTable, ///< Function must be in a unwind table - ZExt ///< Zero extended before/after call - }; -private: - AttributesImpl *Attrs; - Attributes(AttributesImpl *A) : Attrs(A) {} -public: - Attributes() : Attrs(0) {} - Attributes(const Attributes &A) : Attrs(A.Attrs) {} - Attributes &operator=(const Attributes &A) { - Attrs = A.Attrs; - return *this; - } - - /// get - Return a uniquified Attributes object. This takes the uniquified - /// value from the Builder and wraps it in the Attributes class. - static Attributes get(LLVMContext &Context, ArrayRef Vals); - static Attributes get(LLVMContext &Context, AttrBuilder &B); - - /// @brief Return true if the attribute is present. - bool hasAttribute(AttrVal Val) const; - - /// @brief Return true if attributes exist - bool hasAttributes() const; - - /// @brief Return true if the attributes are a non-null intersection. - bool hasAttributes(const Attributes &A) const; - - /// @brief Returns the alignment field of an attribute as a byte alignment - /// value. - unsigned getAlignment() const; - - /// @brief Returns the stack alignment field of an attribute as a byte - /// alignment value. - unsigned getStackAlignment() const; - - /// @brief Parameter attributes that do not apply to vararg call arguments. - bool hasIncompatibleWithVarArgsAttrs() const { - return hasAttribute(Attributes::StructRet); - } - - /// @brief Attributes that only apply to function parameters. - bool hasParameterOnlyAttrs() const { - return hasAttribute(Attributes::ByVal) || - hasAttribute(Attributes::Nest) || - hasAttribute(Attributes::StructRet) || - hasAttribute(Attributes::NoCapture); - } - - /// @brief Attributes that may be applied to the function itself. These cannot - /// be used on return values or function parameters. - bool hasFunctionOnlyAttrs() const { - return hasAttribute(Attributes::NoReturn) || - hasAttribute(Attributes::NoUnwind) || - hasAttribute(Attributes::ReadNone) || - hasAttribute(Attributes::ReadOnly) || - hasAttribute(Attributes::NoInline) || - hasAttribute(Attributes::AlwaysInline) || - hasAttribute(Attributes::OptimizeForSize) || - hasAttribute(Attributes::StackProtect) || - hasAttribute(Attributes::StackProtectReq) || - hasAttribute(Attributes::NoRedZone) || - hasAttribute(Attributes::NoImplicitFloat) || - hasAttribute(Attributes::Naked) || - hasAttribute(Attributes::InlineHint) || - hasAttribute(Attributes::StackAlignment) || - hasAttribute(Attributes::UWTable) || - hasAttribute(Attributes::NonLazyBind) || - hasAttribute(Attributes::ReturnsTwice) || - hasAttribute(Attributes::AddressSafety) || - hasAttribute(Attributes::MinSize); - } - - bool operator==(const Attributes &A) const { - return Attrs == A.Attrs; - } - bool operator!=(const Attributes &A) const { - return Attrs != A.Attrs; - } - - uint64_t Raw() const; - - /// @brief Which attributes cannot be applied to a type. - static Attributes typeIncompatible(Type *Ty); - - /// encodeLLVMAttributesForBitcode - This returns an integer containing an - /// encoding of all the LLVM attributes found in the given attribute bitset. - /// Any change to this encoding is a breaking change to bitcode compatibility. - static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs); - - /// decodeLLVMAttributesForBitcode - This returns an attribute bitset - /// containing the LLVM attributes that have been decoded from the given - /// integer. This function must stay in sync with - /// 'encodeLLVMAttributesForBitcode'. - static Attributes decodeLLVMAttributesForBitcode(LLVMContext &C, - uint64_t EncodedAttrs); - - /// getAsString - The set of Attributes set in Attributes is converted to a - /// string of equivalent mnemonics. This is, presumably, for writing out the - /// mnemonics for the assembly writer. - /// @brief Convert attribute bits to text - std::string getAsString() const; -}; - -//===----------------------------------------------------------------------===// -/// AttrBuilder - This class is used in conjunction with the Attributes::get -/// method to create an Attributes object. The object itself is uniquified. The -/// Builder's value, however, is not. So this can be used as a quick way to test -/// for equality, presence of attributes, etc. -class AttrBuilder { - uint64_t Bits; -public: - AttrBuilder() : Bits(0) {} - explicit AttrBuilder(uint64_t B) : Bits(B) {} - AttrBuilder(const Attributes &A) : Bits(A.Raw()) {} - AttrBuilder(const AttrBuilder &B) : Bits(B.Bits) {} - - void clear() { Bits = 0; } - - /// addAttribute - Add an attribute to the builder. - AttrBuilder &addAttribute(Attributes::AttrVal Val); - - /// removeAttribute - Remove an attribute from the builder. - AttrBuilder &removeAttribute(Attributes::AttrVal Val); - - /// addAttribute - Add the attributes from A to the builder. - AttrBuilder &addAttributes(const Attributes &A); - - /// removeAttribute - Remove the attributes from A from the builder. - AttrBuilder &removeAttributes(const Attributes &A); - - /// hasAttribute - Return true if the builder has the specified attribute. - bool hasAttribute(Attributes::AttrVal A) const; - - /// hasAttributes - Return true if the builder has IR-level attributes. - bool hasAttributes() const; - - /// hasAttributes - Return true if the builder has any attribute that's in the - /// specified attribute. - bool hasAttributes(const Attributes &A) const; - - /// hasAlignmentAttr - Return true if the builder has an alignment attribute. - bool hasAlignmentAttr() const; - - /// getAlignment - Retrieve the alignment attribute, if it exists. - uint64_t getAlignment() const; - - /// getStackAlignment - Retrieve the stack alignment attribute, if it exists. - uint64_t getStackAlignment() const; - - /// addAlignmentAttr - This turns an int alignment (which must be a power of - /// 2) into the form used internally in Attributes. - AttrBuilder &addAlignmentAttr(unsigned Align); - - /// addStackAlignmentAttr - This turns an int stack alignment (which must be a - /// power of 2) into the form used internally in Attributes. - AttrBuilder &addStackAlignmentAttr(unsigned Align); - - /// addRawValue - Add the raw value to the internal representation. - /// N.B. This should be used ONLY for decoding LLVM bitcode! - AttrBuilder &addRawValue(uint64_t Val); - - /// @brief Remove attributes that are used on functions only. - void removeFunctionOnlyAttrs() { - removeAttribute(Attributes::NoReturn) - .removeAttribute(Attributes::NoUnwind) - .removeAttribute(Attributes::ReadNone) - .removeAttribute(Attributes::ReadOnly) - .removeAttribute(Attributes::NoInline) - .removeAttribute(Attributes::AlwaysInline) - .removeAttribute(Attributes::OptimizeForSize) - .removeAttribute(Attributes::StackProtect) - .removeAttribute(Attributes::StackProtectReq) - .removeAttribute(Attributes::NoRedZone) - .removeAttribute(Attributes::NoImplicitFloat) - .removeAttribute(Attributes::Naked) - .removeAttribute(Attributes::InlineHint) - .removeAttribute(Attributes::StackAlignment) - .removeAttribute(Attributes::UWTable) - .removeAttribute(Attributes::NonLazyBind) - .removeAttribute(Attributes::ReturnsTwice) - .removeAttribute(Attributes::AddressSafety) - .removeAttribute(Attributes::MinSize); - } - - uint64_t Raw() const { return Bits; } - - bool operator==(const AttrBuilder &B) { - return Bits == B.Bits; - } - bool operator!=(const AttrBuilder &B) { - return Bits != B.Bits; - } -}; - -//===----------------------------------------------------------------------===// -// AttributeWithIndex -//===----------------------------------------------------------------------===// - -/// AttributeWithIndex - This is just a pair of values to associate a set of -/// attributes with an index. -struct AttributeWithIndex { - Attributes Attrs; ///< The attributes that are set, or'd together. - unsigned Index; ///< Index of the parameter for which the attributes apply. - ///< Index 0 is used for return value attributes. - ///< Index ~0U is used for function attributes. - - static AttributeWithIndex get(LLVMContext &C, unsigned Idx, - ArrayRef Attrs) { - return get(Idx, Attributes::get(C, Attrs)); - } - static AttributeWithIndex get(unsigned Idx, Attributes Attrs) { - AttributeWithIndex P; - P.Index = Idx; - P.Attrs = Attrs; - return P; - } -}; - -//===----------------------------------------------------------------------===// -// AttrListPtr Smart Pointer -//===----------------------------------------------------------------------===// - -class AttributeListImpl; - -/// AttrListPtr - This class manages the ref count for the opaque -/// AttributeListImpl object and provides accessors for it. -class AttrListPtr { -public: - enum AttrIndex { - ReturnIndex = 0U, - FunctionIndex = ~0U - }; -private: - /// @brief The attributes that we are managing. This can be null to represent - /// the empty attributes list. - AttributeListImpl *AttrList; - - /// @brief The attributes for the specified index are returned. Attributes - /// for the result are denoted with Idx = 0. - Attributes getAttributes(unsigned Idx) const; - - explicit AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {} -public: - AttrListPtr() : AttrList(0) {} - AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {} - const AttrListPtr &operator=(const AttrListPtr &RHS); - - //===--------------------------------------------------------------------===// - // Attribute List Construction and Mutation - //===--------------------------------------------------------------------===// - - /// get - Return a Attributes list with the specified parameters in it. - static AttrListPtr get(LLVMContext &C, ArrayRef Attrs); - - /// addAttr - Add the specified attribute at the specified index to this - /// attribute list. Since attribute lists are immutable, this - /// returns the new list. - AttrListPtr addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const; - - /// removeAttr - Remove the specified attribute at the specified index from - /// this attribute list. Since attribute lists are immutable, this - /// returns the new list. - AttrListPtr removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const; - - //===--------------------------------------------------------------------===// - // Attribute List Accessors - //===--------------------------------------------------------------------===// - /// getParamAttributes - The attributes for the specified index are - /// returned. - Attributes getParamAttributes(unsigned Idx) const { - return getAttributes(Idx); - } - - /// getRetAttributes - The attributes for the ret value are - /// returned. - Attributes getRetAttributes() const { - return getAttributes(ReturnIndex); - } - - /// getFnAttributes - The function attributes are returned. - Attributes getFnAttributes() const { - return getAttributes(FunctionIndex); - } - - /// paramHasAttr - Return true if the specified parameter index has the - /// specified attribute set. - bool paramHasAttr(unsigned Idx, Attributes Attr) const { - return getAttributes(Idx).hasAttributes(Attr); - } - - /// getParamAlignment - Return the alignment for the specified function - /// parameter. - unsigned getParamAlignment(unsigned Idx) const { - return getAttributes(Idx).getAlignment(); - } - - /// hasAttrSomewhere - Return true if the specified attribute is set for at - /// least one parameter or for the return value. - bool hasAttrSomewhere(Attributes::AttrVal Attr) const; - - unsigned getNumAttrs() const; - Attributes &getAttributesAtIndex(unsigned i) const; - - /// operator==/!= - Provide equality predicates. - bool operator==(const AttrListPtr &RHS) const - { return AttrList == RHS.AttrList; } - bool operator!=(const AttrListPtr &RHS) const - { return AttrList != RHS.AttrList; } - - //===--------------------------------------------------------------------===// - // Attribute List Introspection - //===--------------------------------------------------------------------===// - - /// getRawPointer - Return a raw pointer that uniquely identifies this - /// attribute list. - void *getRawPointer() const { - return AttrList; - } - - // Attributes are stored as a dense set of slots, where there is one - // slot for each argument that has an attribute. This allows walking over the - // dense set instead of walking the sparse list of attributes. - - /// isEmpty - Return true if there are no attributes. - /// - bool isEmpty() const { - return AttrList == 0; - } - - /// getNumSlots - Return the number of slots used in this attribute list. - /// This is the number of arguments that have an attribute set on them - /// (including the function itself). - unsigned getNumSlots() const; - - /// getSlot - Return the AttributeWithIndex at the specified slot. This - /// holds a index number plus a set of attributes. - const AttributeWithIndex &getSlot(unsigned Slot) const; - - void dump() const; -}; - -} // End llvm namespace - -#endif diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h index 4fd4b5d90a9e..7b30c7e458fa 100644 --- a/include/llvm/Bitcode/Archive.h +++ b/include/llvm/Bitcode/Archive.h @@ -50,10 +50,10 @@ class ArchiveMember : public ilist_node { SVR4SymbolTableFlag = 1, ///< Member is a SVR4 symbol table BSD4SymbolTableFlag = 2, ///< Member is a BSD4 symbol table LLVMSymbolTableFlag = 4, ///< Member is an LLVM symbol table - BitcodeFlag = 8, ///< Member is bitcode - HasPathFlag = 16, ///< Member has a full or partial path + BitcodeFlag = 8, ///< Member is bitcode + HasPathFlag = 16, ///< Member has a full or partial path HasLongFilenameFlag = 32, ///< Member uses the long filename syntax - StringTableFlag = 64 ///< Member is an ar(1) format string table + StringTableFlag = 64 ///< Member is an ar(1) format string table }; /// @} diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h index 28e1ab1c8711..b510daf33147 100644 --- a/include/llvm/Bitcode/BitCodes.h +++ b/include/llvm/Bitcode/BitCodes.h @@ -26,8 +26,8 @@ namespace llvm { namespace bitc { enum StandardWidths { - BlockIDWidth = 8, // We use VBR-8 for block IDs. - CodeLenWidth = 4, // Codelen are VBR-4. + BlockIDWidth = 8, // We use VBR-8 for block IDs. + CodeLenWidth = 4, // Codelen are VBR-4. BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block. }; @@ -69,10 +69,11 @@ namespace bitc { enum BlockInfoCodes { // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd // block, instead of the BlockInfo block. - - BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#] - BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name] - BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: [id, name] + + BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#] + BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name] + BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: + // [id, name] }; } // End bitc namespace @@ -99,7 +100,7 @@ public: explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0) : Val(Data), IsLiteral(false), Enc(E) {} - bool isLiteral() const { return IsLiteral; } + bool isLiteral() const { return IsLiteral; } bool isEncoding() const { return !IsLiteral; } // Accessors for literals. @@ -138,18 +139,18 @@ public: if (C >= 'a' && C <= 'z') return C-'a'; if (C >= 'A' && C <= 'Z') return C-'A'+26; if (C >= '0' && C <= '9') return C-'0'+26+26; - if (C == '.') return 62; - if (C == '_') return 63; + if (C == '.') return 62; + if (C == '_') return 63; llvm_unreachable("Not a value Char6 character!"); } static char DecodeChar6(unsigned V) { assert((V & ~63) == 0 && "Not a Char6 encoded character!"); - if (V < 26) return V+'a'; - if (V < 26+26) return V-26+'A'; + if (V < 26) return V+'a'; + if (V < 26+26) return V-26+'A'; if (V < 26+26+10) return V-26-26+'0'; - if (V == 62) return '.'; - if (V == 63) return '_'; + if (V == 62) return '.'; + if (V == 63) return '_'; llvm_unreachable("Not a value Char6 character!"); } diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index 840f57e7526d..f3139739cd18 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef BITSTREAM_READER_H -#define BITSTREAM_READER_H +#ifndef LLVM_BITCODE_BITSTREAMREADER_H +#define LLVM_BITCODE_BITSTREAMREADER_H #include "llvm/ADT/OwningPtr.h" #include "llvm/Bitcode/BitCodes.h" @@ -27,6 +27,11 @@ namespace llvm { class Deserializer; +/// BitstreamReader - This class is used to read from an LLVM bitcode stream, +/// maintaining information that is global to decoding the entire file. While +/// a file is being read, multiple cursors can be independently advanced or +/// skipped around within the file. These are represented by the +/// BitstreamCursor class. class BitstreamReader { public: /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. @@ -35,12 +40,12 @@ public: unsigned BlockID; std::vector Abbrevs; std::string Name; - + std::vector > RecordNames; }; private: OwningPtr BitcodeBytes; - + std::vector BlockInfoRecords; /// IgnoreBlockInfoNames - This is set to true if we don't care about the @@ -86,7 +91,7 @@ public: /// name information. void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } - + //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -95,7 +100,7 @@ public: /// block info block for this Bitstream. We only process it for the first /// cursor that walks over it. bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } - + /// getBlockInfo - If there is block info for the specified ID, return it, /// otherwise return null. const BlockInfo *getBlockInfo(unsigned BlockID) const { @@ -119,113 +124,114 @@ public: BlockInfoRecords.back().BlockID = BlockID; return BlockInfoRecords.back(); } - }; + +/// BitstreamEntry - When advancing through a bitstream cursor, each advance can +/// discover a few different kinds of entries: +/// Error - Malformed bitcode was found. +/// EndBlock - We've reached the end of the current block, (or the end of the +/// file, which is treated like a series of EndBlock records. +/// SubBlock - This is the start of a new subblock of a specific ID. +/// Record - This is a record with a specific AbbrevID. +/// +struct BitstreamEntry { + enum { + Error, + EndBlock, + SubBlock, + Record + } Kind; + + unsigned ID; + + static BitstreamEntry getError() { + BitstreamEntry E; E.Kind = Error; return E; + } + static BitstreamEntry getEndBlock() { + BitstreamEntry E; E.Kind = EndBlock; return E; + } + static BitstreamEntry getSubBlock(unsigned ID) { + BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; + } + static BitstreamEntry getRecord(unsigned AbbrevID) { + BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; + } +}; + +/// BitstreamCursor - This represents a position within a bitcode file. There +/// may be multiple independent cursors reading within one bitstream, each +/// maintaining their own local state. +/// +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not +/// be passed by value. class BitstreamCursor { friend class Deserializer; BitstreamReader *BitStream; size_t NextChar; - - /// CurWord - This is the current data we have pulled from the stream but have - /// not returned to the client. - uint32_t CurWord; - + + + /// CurWord/word_t - This is the current data we have pulled from the stream + /// but have not returned to the client. This is specifically and + /// intentionally defined to follow the word size of the host machine for + /// efficiency. We use word_t in places that are aware of this to make it + /// perfectly explicit what is going on. + typedef uint32_t word_t; + word_t CurWord; + /// BitsInCurWord - This is the number of bits in CurWord that are valid. This - /// is always from [0...31] inclusive. + /// is always from [0...31/63] inclusive (depending on word size). unsigned BitsInCurWord; - + // CurCodeSize - This is the declared size of code values used for the current // block, in bits. unsigned CurCodeSize; - + /// CurAbbrevs - Abbrevs installed at in this block. std::vector CurAbbrevs; - + struct Block { unsigned PrevCodeSize; std::vector PrevAbbrevs; explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} }; - + /// BlockScope - This tracks the codesize of parent blocks. SmallVector BlockScope; - + + public: BitstreamCursor() : BitStream(0), NextChar(0) { } BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) { operator=(RHS); } - + explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) { NextChar = 0; CurWord = 0; BitsInCurWord = 0; CurCodeSize = 2; } - + void init(BitstreamReader &R) { freeState(); - + BitStream = &R; NextChar = 0; CurWord = 0; BitsInCurWord = 0; CurCodeSize = 2; } - + ~BitstreamCursor() { freeState(); } - - void operator=(const BitstreamCursor &RHS) { - freeState(); - - BitStream = RHS.BitStream; - NextChar = RHS.NextChar; - CurWord = RHS.CurWord; - BitsInCurWord = RHS.BitsInCurWord; - CurCodeSize = RHS.CurCodeSize; - - // Copy abbreviations, and bump ref counts. - CurAbbrevs = RHS.CurAbbrevs; - for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); - i != e; ++i) - CurAbbrevs[i]->addRef(); - - // Copy block scope and bump ref counts. - BlockScope = RHS.BlockScope; - for (unsigned S = 0, e = static_cast(BlockScope.size()); - S != e; ++S) { - std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (unsigned i = 0, e = static_cast(Abbrevs.size()); - i != e; ++i) - Abbrevs[i]->addRef(); - } - } - - void freeState() { - // Free all the Abbrevs. - for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); - i != e; ++i) - CurAbbrevs[i]->dropRef(); - CurAbbrevs.clear(); - - // Free all the Abbrevs in the block scope. - for (unsigned S = 0, e = static_cast(BlockScope.size()); - S != e; ++S) { - std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (unsigned i = 0, e = static_cast(Abbrevs.size()); - i != e; ++i) - Abbrevs[i]->dropRef(); - } - BlockScope.clear(); - } - - /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. - unsigned GetAbbrevIDWidth() const { return CurCodeSize; } - + + void operator=(const BitstreamCursor &RHS); + + void freeState(); + bool isEndPos(size_t pos) { return BitStream->getBitcodeBytes().isObjectEnd(static_cast(pos)); } @@ -236,61 +242,113 @@ public: static_cast(pos - 1)); } - unsigned char getByte(size_t pos) { - uint8_t byte = -1; - BitStream->getBitcodeBytes().readByte(pos, &byte); - return byte; - } - uint32_t getWord(size_t pos) { - uint8_t buf[sizeof(uint32_t)]; - memset(buf, 0xFF, sizeof(buf)); - BitStream->getBitcodeBytes().readBytes(pos, - sizeof(buf), - buf, - NULL); + uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; + BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL); return *reinterpret_cast(buf); } bool AtEndOfStream() { - return isEndPos(NextChar) && BitsInCurWord == 0; + return BitsInCurWord == 0 && isEndPos(NextChar); } - + + /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #. + unsigned getAbbrevIDWidth() const { return CurCodeSize; } + /// GetCurrentBitNo - Return the bit # of the bit we are reading. uint64_t GetCurrentBitNo() const { return NextChar*CHAR_BIT - BitsInCurWord; } - + BitstreamReader *getBitStreamReader() { return BitStream; } const BitstreamReader *getBitStreamReader() const { return BitStream; } - - + + /// Flags that modify the behavior of advance(). + enum { + /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does + /// not automatically pop the block scope when the end of a block is + /// reached. + AF_DontPopBlockAtEnd = 1, + + /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are + /// returned just like normal records. + AF_DontAutoprocessAbbrevs = 2 + }; + + /// advance - Advance the current bitstream, returning the next entry in the + /// stream. + BitstreamEntry advance(unsigned Flags = 0) { + while (1) { + unsigned Code = ReadCode(); + if (Code == bitc::END_BLOCK) { + // Pop the end of the block unless Flags tells us not to. + if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) + return BitstreamEntry::getError(); + return BitstreamEntry::getEndBlock(); + } + + if (Code == bitc::ENTER_SUBBLOCK) + return BitstreamEntry::getSubBlock(ReadSubBlockID()); + + if (Code == bitc::DEFINE_ABBREV && + !(Flags & AF_DontAutoprocessAbbrevs)) { + // We read and accumulate abbrev's, the client can't do anything with + // them anyway. + ReadAbbrevRecord(); + continue; + } + + return BitstreamEntry::getRecord(Code); + } + } + + /// advanceSkippingSubblocks - This is a convenience function for clients that + /// don't expect any subblocks. This just skips over them automatically. + BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { + while (1) { + // If we found a normal entry, return it. + BitstreamEntry Entry = advance(Flags); + if (Entry.Kind != BitstreamEntry::SubBlock) + return Entry; + + // If we found a sub-block, just skip over it and check the next entry. + if (SkipBlock()) + return BitstreamEntry::getError(); + } + } + /// JumpToBit - Reset the stream to the specified bit number. void JumpToBit(uint64_t BitNo) { - uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3; - uintptr_t WordBitNo = uintptr_t(BitNo) & 31; + uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); + unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); assert(canSkipToPos(ByteNo) && "Invalid location"); - + // Move the cursor to the right word. NextChar = ByteNo; BitsInCurWord = 0; CurWord = 0; - + // Skip over any bits that are already consumed. - if (WordBitNo) - Read(static_cast(WordBitNo)); + if (WordBitNo) { + if (sizeof(word_t) > 4) + Read64(WordBitNo); + else + Read(WordBitNo); + } } - - + + uint32_t Read(unsigned NumBits) { - assert(NumBits <= 32 && "Cannot return more than 32 bits!"); + assert(NumBits && NumBits <= 32 && + "Cannot return zero or more than 32 bits!"); + // If the field is fully contained by CurWord, return it quickly. if (BitsInCurWord >= NumBits) { - uint32_t R = CurWord & ((1U << NumBits)-1); + uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); CurWord >>= NumBits; BitsInCurWord -= NumBits; return R; @@ -303,24 +361,37 @@ public: return 0; } - unsigned R = CurWord; + uint32_t R = uint32_t(CurWord); // Read the next word from the stream. - CurWord = getWord(NextChar); - NextChar += 4; + uint8_t Array[sizeof(word_t)] = {0}; + + BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), + Array, NULL); + + // Handle big-endian byte-swapping if necessary. + support::detail::packed_endian_specific_integral + EndianValue; + memcpy(&EndianValue, Array, sizeof(Array)); + + CurWord = EndianValue; + + NextChar += sizeof(word_t); // Extract NumBits-BitsInCurWord from what we just read. unsigned BitsLeft = NumBits-BitsInCurWord; - // Be careful here, BitsLeft is in the range [1..32] inclusive. - R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; + // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive. + R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft))) + << BitsInCurWord); - // BitsLeft bits have just been used up from CurWord. - if (BitsLeft != 32) + // BitsLeft bits have just been used up from CurWord. BitsLeft is in the + // range [1..32]/[1..64] so be careful how we shift. + if (BitsLeft != sizeof(word_t)*8) CurWord >>= BitsLeft; else CurWord = 0; - BitsInCurWord = 32-BitsLeft; + BitsInCurWord = sizeof(word_t)*8-BitsLeft; return R; } @@ -369,10 +440,21 @@ public: } } - void SkipToWord() { +private: + void SkipToFourByteBoundary() { + // If word_t is 64-bits and if we've read less than 32 bits, just dump + // the bits we have up to the next 32-bit boundary. + if (sizeof(word_t) > 4 && + BitsInCurWord >= 32) { + CurWord >>= BitsInCurWord-32; + BitsInCurWord = 32; + return; + } + BitsInCurWord = 0; CurWord = 0; } +public: unsigned ReadCode() { return Read(CurCodeSize); @@ -395,62 +477,37 @@ public: // Read and ignore the codelen value. Since we are skipping this block, we // don't care what code widths are used inside of it. ReadVBR(bitc::CodeLenWidth); - SkipToWord(); - unsigned NumWords = Read(bitc::BlockSizeWidth); + SkipToFourByteBoundary(); + unsigned NumFourBytes = Read(bitc::BlockSizeWidth); // Check that the block wasn't partially defined, and that the offset isn't // bogus. - size_t SkipTo = NextChar + NumWords*4; - if (AtEndOfStream() || !canSkipToPos(SkipTo)) + size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; + if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) return true; - NextChar = SkipTo; + JumpToBit(SkipTo); return false; } /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter /// the block, and return true if the block has an error. - bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) { - // Save the current block's state on BlockScope. - BlockScope.push_back(Block(CurCodeSize)); - BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); - - // Add the abbrevs specific to this block to the CurAbbrevs list. - if (const BitstreamReader::BlockInfo *Info = - BitStream->getBlockInfo(BlockID)) { - for (unsigned i = 0, e = static_cast(Info->Abbrevs.size()); - i != e; ++i) { - CurAbbrevs.push_back(Info->Abbrevs[i]); - CurAbbrevs.back()->addRef(); - } - } - - // Get the codesize of this block. - CurCodeSize = ReadVBR(bitc::CodeLenWidth); - SkipToWord(); - unsigned NumWords = Read(bitc::BlockSizeWidth); - if (NumWordsP) *NumWordsP = NumWords; - - // Validate that this block is sane. - if (CurCodeSize == 0 || AtEndOfStream()) - return true; - - return false; - } + bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0); bool ReadBlockEnd() { if (BlockScope.empty()) return true; // Block tail: // [END_BLOCK, ] - SkipToWord(); + SkipToFourByteBoundary(); - PopBlockScope(); + popBlockScope(); return false; } private: - void PopBlockScope() { + + void popBlockScope() { CurCodeSize = BlockScope.back().PrevCodeSize; // Delete abbrevs from popped scope. @@ -462,207 +519,40 @@ private: BlockScope.pop_back(); } - //===--------------------------------------------------------------------===// + //===--------------------------------------------------------------------===// // Record Processing //===--------------------------------------------------------------------===// private: - void ReadAbbreviatedLiteral(const BitCodeAbbrevOp &Op, - SmallVectorImpl &Vals) { - assert(Op.isLiteral() && "Not a literal"); - // If the abbrev specifies the literal value to use, use it. - Vals.push_back(Op.getLiteralValue()); - } - - void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, - SmallVectorImpl &Vals) { - assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); + void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals); + void readAbbreviatedField(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals); + void skipAbbreviatedField(const BitCodeAbbrevOp &Op); - // Decode the value as we are commanded. - switch (Op.getEncoding()) { - default: llvm_unreachable("Unknown encoding!"); - case BitCodeAbbrevOp::Fixed: - Vals.push_back(Read((unsigned)Op.getEncodingData())); - break; - case BitCodeAbbrevOp::VBR: - Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); - break; - case BitCodeAbbrevOp::Char6: - Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); - break; - } - } public: - /// getAbbrev - Return the abbreviation for the specified AbbrevId. + /// getAbbrev - Return the abbreviation for the specified AbbrevId. const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); return CurAbbrevs[AbbrevNo]; } - - unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl &Vals, - const char **BlobStart = 0, unsigned *BlobLen = 0) { - if (AbbrevID == bitc::UNABBREV_RECORD) { - unsigned Code = ReadVBR(6); - unsigned NumElts = ReadVBR(6); - for (unsigned i = 0; i != NumElts; ++i) - Vals.push_back(ReadVBR64(6)); - return Code; - } - const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + /// skipRecord - Read the current record and discard it. + void skipRecord(unsigned AbbrevID); - for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { - const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); - if (Op.isLiteral()) { - ReadAbbreviatedLiteral(Op, Vals); - } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { - // Array case. Read the number of elements as a vbr6. - unsigned NumElts = ReadVBR(6); + unsigned readRecord(unsigned AbbrevID, SmallVectorImpl &Vals, + StringRef *Blob = 0); - // Get the element encoding. - assert(i+2 == e && "array op not second to last?"); - const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); - - // Read all the elements. - for (; NumElts; --NumElts) - ReadAbbreviatedField(EltEnc, Vals); - } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { - // Blob case. Read the number of bytes as a vbr6. - unsigned NumElts = ReadVBR(6); - SkipToWord(); // 32-bit alignment - - // Figure out where the end of this blob will be including tail padding. - size_t NewEnd = NextChar+((NumElts+3)&~3); - - // If this would read off the end of the bitcode file, just set the - // record to empty and return. - if (!canSkipToPos(NewEnd)) { - Vals.append(NumElts, 0); - NextChar = BitStream->getBitcodeBytes().getExtent(); - break; - } - - // Otherwise, read the number of bytes. If we can return a reference to - // the data, do so to avoid copying it. - if (BlobStart) { - *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer( - NextChar, NumElts); - *BlobLen = NumElts; - } else { - for (; NumElts; ++NextChar, --NumElts) - Vals.push_back(getByte(NextChar)); - } - // Skip over tail padding. - NextChar = NewEnd; - } else { - ReadAbbreviatedField(Op, Vals); - } - } - - unsigned Code = (unsigned)Vals[0]; - Vals.erase(Vals.begin()); - return Code; - } - - unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl &Vals, - const char *&BlobStart, unsigned &BlobLen) { - return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen); - } - - //===--------------------------------------------------------------------===// // Abbrev Processing //===--------------------------------------------------------------------===// + void ReadAbbrevRecord(); - void ReadAbbrevRecord() { - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - unsigned NumOpInfo = ReadVBR(5); - for (unsigned i = 0; i != NumOpInfo; ++i) { - bool IsLiteral = Read(1) ? true : false; - if (IsLiteral) { - Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); - continue; - } - - BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); - if (BitCodeAbbrevOp::hasEncodingData(E)) - Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); - else - Abbv->Add(BitCodeAbbrevOp(E)); - } - CurAbbrevs.push_back(Abbv); - } - -public: - - bool ReadBlockInfoBlock() { - // If this is the second stream to get to the block info block, skip it. - if (BitStream->hasBlockInfoRecords()) - return SkipBlock(); - - if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; - - SmallVector Record; - BitstreamReader::BlockInfo *CurBlockInfo = 0; - - // Read all the records for this module. - while (1) { - unsigned Code = ReadCode(); - if (Code == bitc::END_BLOCK) - return ReadBlockEnd(); - if (Code == bitc::ENTER_SUBBLOCK) { - ReadSubBlockID(); - if (SkipBlock()) return true; - continue; - } - - // Read abbrev records, associate them with CurBID. - if (Code == bitc::DEFINE_ABBREV) { - if (!CurBlockInfo) return true; - ReadAbbrevRecord(); - - // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the - // appropriate BlockInfo. - BitCodeAbbrev *Abbv = CurAbbrevs.back(); - CurAbbrevs.pop_back(); - CurBlockInfo->Abbrevs.push_back(Abbv); - continue; - } - - // Read a record. - Record.clear(); - switch (ReadRecord(Code, Record)) { - default: break; // Default behavior, ignore unknown content. - case bitc::BLOCKINFO_CODE_SETBID: - if (Record.size() < 1) return true; - CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]); - break; - case bitc::BLOCKINFO_CODE_BLOCKNAME: { - if (!CurBlockInfo) return true; - if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. - std::string Name; - for (unsigned i = 0, e = Record.size(); i != e; ++i) - Name += (char)Record[i]; - CurBlockInfo->Name = Name; - break; - } - case bitc::BLOCKINFO_CODE_SETRECORDNAME: { - if (!CurBlockInfo) return true; - if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. - std::string Name; - for (unsigned i = 1, e = Record.size(); i != e; ++i) - Name += (char)Record[i]; - CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0], - Name)); - break; - } - } - } - } + bool ReadBlockInfoBlock(); }; - + } // End llvm namespace #endif diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index dea118f98ed2..a837211875f5 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#ifndef BITSTREAM_WRITER_H -#define BITSTREAM_WRITER_H +#ifndef LLVM_BITCODE_BITSTREAMWRITER_H +#define LLVM_BITCODE_BITSTREAMWRITER_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitCodes.h" #include @@ -273,7 +273,7 @@ public: private: /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev - /// record. This is a no-op, since the abbrev specifies the literal to use. + /// record. This is a no-op, since the abbrev specifies the literal to use. template void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { assert(Op.isLiteral() && "Not a literal"); @@ -282,13 +282,13 @@ private: assert(V == Op.getLiteralValue() && "Invalid abbrev for record!"); } - + /// EmitAbbreviatedField - Emit a single scalar field value with the specified /// encoding. template void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); - + // Encode the value as we are commanded. switch (Op.getEncoding()) { default: llvm_unreachable("Unknown encoding!"); @@ -305,7 +305,7 @@ private: break; } } - + /// EmitRecordWithAbbrevImpl - This is the core implementation of the record /// emission code. If BlobData is non-null, then it specifies an array of /// data that should be emitted as part of the Blob or Array operand that is @@ -341,11 +341,11 @@ private: "Blob data and record entries specified for array!"); // Emit a vbr6 to indicate the number of elements present. EmitVBR(static_cast(BlobLen), 6); - + // Emit each field. for (unsigned i = 0; i != BlobLen; ++i) EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); - + // Know that blob data is consumed for assertion below. BlobData = 0; } else { @@ -359,7 +359,7 @@ private: } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { // If this record has blob data, emit it, otherwise we must have record // entries to encode this way. - + // Emit a vbr6 to indicate the number of elements present. if (BlobData) { EmitVBR(static_cast(BlobLen), 6); @@ -368,7 +368,7 @@ private: } else { EmitVBR(static_cast(Vals.size()-RecordIdx), 6); } - + // Flush to a 32-bit alignment boundary. FlushToWord(); @@ -376,7 +376,7 @@ private: if (BlobData) { for (unsigned i = 0; i != BlobLen; ++i) WriteByte((unsigned char)BlobData[i]); - + // Know that blob data is consumed for assertion below. BlobData = 0; } else { @@ -399,7 +399,7 @@ private: assert(BlobData == 0 && "Blob data specified for record that doesn't use it!"); } - + public: /// EmitRecord - Emit the specified record to the stream, using an abbrev if @@ -420,10 +420,10 @@ public: // Insert the code into Vals to treat it uniformly. Vals.insert(Vals.begin(), Code); - + EmitRecordWithAbbrev(Abbrev, Vals); } - + /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. /// Unlike EmitRecord, the code for the record should be included in Vals as /// the first entry. @@ -431,7 +431,7 @@ public: void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl &Vals) { EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); } - + /// EmitRecordWithBlob - Emit the specified record to the stream, using an /// abbrev that includes a blob at the end. The blob data to emit is /// specified by the pointer and length specified at the end. In contrast to @@ -458,10 +458,10 @@ public: template void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl &Vals, const char *ArrayData, unsigned ArrayLen) { - return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, + return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, ArrayLen)); } - + //===--------------------------------------------------------------------===// // Abbrev Emission //===--------------------------------------------------------------------===// diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index c1dc190304c2..f9690d5b779c 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -29,18 +29,17 @@ namespace bitc { // Module sub-block id's. PARAMATTR_BLOCK_ID, + PARAMATTR_GROUP_BLOCK_ID, - UNUSED_ID1, - CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, - - UNUSED_ID2, - + + UNUSED_ID1, + VALUE_SYMTAB_BLOCK_ID, METADATA_BLOCK_ID, METADATA_ATTACHMENT_ID, - + TYPE_BLOCK_ID_NEW, USELIST_BLOCK_ID @@ -54,6 +53,8 @@ namespace bitc { MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] MODULE_CODE_ASM = 4, // ASM: [strchr x N] MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] + + // FIXME: Remove DEPLIB in 4.0. MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] // GLOBALVAR: [pointer type, isconst, initid, @@ -67,7 +68,7 @@ namespace bitc { // ALIAS: [alias type, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS = 9, - /// MODULE_CODE_PURGEVALS: [numvals] + // MODULE_CODE_PURGEVALS: [numvals] MODULE_CODE_PURGEVALS = 10, MODULE_CODE_GCNAME = 11 // GCNAME: [strchr x N] @@ -75,7 +76,12 @@ namespace bitc { /// PARAMATTR blocks have code for defining a parameter attribute set. enum AttributeCodes { - PARAMATTR_CODE_ENTRY = 1 // ENTRY: [paramidx0, attr0, paramidx1, attr1...] + // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0 + PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0, + // paramidx1, attr1...] + PARAMATTR_CODE_ENTRY = 2, // ENTRY: [paramidx0, attrgrp0, + // paramidx1, attrgrp1, ...] + PARAMATTR_GRP_CODE_ENTRY = 3 // ENTRY: [id, attr0, att1, ...] }; /// TYPE blocks have codes for each type primitive they use. @@ -93,9 +99,9 @@ namespace bitc { TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty, // paramty x N] - + TYPE_CODE_HALF = 10, // HALF - + TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty] TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty] @@ -109,7 +115,7 @@ namespace bitc { TYPE_CODE_METADATA = 16, // METADATA TYPE_CODE_X86_MMX = 17, // X86 MMX - + TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N] TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N] TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N] @@ -141,6 +147,7 @@ namespace bitc { METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] METADATA_ATTACHMENT = 11 // [m x [value, [n x [id, mdnode]]] }; + // The constants block (CONSTANTS_BLOCK_ID) describes emission for each // constant and maintains an implicit current type value. enum ConstantsCodes { @@ -234,7 +241,7 @@ namespace bitc { OBO_NO_SIGNED_WRAP = 1 }; - /// PossiblyExactOperatorOptionalFlags - Flags for serializing + /// PossiblyExactOperatorOptionalFlags - Flags for serializing /// PossiblyExactOperator's SubclassOptionalData contents. enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index dd96b043fc95..78f40ca17e61 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_BITCODE_H -#define LLVM_BITCODE_H +#ifndef LLVM_BITCODE_READERWRITER_H +#define LLVM_BITCODE_READERWRITER_H #include diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt index f8cb4250584c..32ffca75bb7a 100644 --- a/include/llvm/CMakeLists.txt +++ b/include/llvm/CMakeLists.txt @@ -1,10 +1,4 @@ -set(LLVM_TARGET_DEFINITIONS Intrinsics.td) - -tablegen(LLVM Intrinsics.gen -gen-intrinsic) - -add_custom_target(intrinsics_gen ALL - DEPENDS ${llvm_builded_incs_dir}/Intrinsics.gen) -set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning") +add_subdirectory(IR) if( MSVC_IDE OR XCODE ) # Creates a dummy target containing all headers for the benefit of diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h index 0b609ed6586e..ce9ca0a0583a 100644 --- a/include/llvm/CodeGen/Analysis.h +++ b/include/llvm/CodeGen/Analysis.h @@ -14,12 +14,12 @@ #ifndef LLVM_CODEGEN_ANALYSIS_H #define LLVM_CODEGEN_ANALYSIS_H -#include "llvm/Instructions.h" -#include "llvm/InlineAsm.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CallSite.h" namespace llvm { @@ -86,11 +86,7 @@ ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred); /// between it and the return. /// /// This function only tests target-independent requirements. -bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, - const TargetLowering &TLI); - -bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - SDValue &Chain, const TargetLowering &TLI); +bool isInTailCallPosition(ImmutableCallSite CS, const TargetLowering &TLI); } // End llvm namespace diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index a92b85939f37..e0a6e3f4027a 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -17,7 +17,7 @@ #define LLVM_CODEGEN_ASMPRINTER_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/InlineAsm.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -385,10 +385,8 @@ namespace llvm { /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. unsigned GetSizeOfEncodedValue(unsigned Encoding) const; - /// EmitReference - Emit a reference to a label with a specified encoding. - /// - void EmitReference(const MCSymbol *Sym, unsigned Encoding) const; - void EmitReference(const GlobalValue *GV, unsigned Encoding) const; + /// EmitReference - Emit reference to a ttype global with a specified encoding. + void EmitTTypeReference(const GlobalValue *GV, unsigned Encoding) const; /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of /// its section. This can be done with a special directive if the target diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index 2f76a6cc5583..9cd2decfacff 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -11,8 +11,8 @@ #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SlotIndexes.h" namespace llvm { diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 436918b1eb33..c035e0777cce 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -16,11 +16,11 @@ #define LLVM_CODEGEN_CALLINGCONVLOWER_H #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetCallingConv.h" -#include "llvm/CallingConv.h" namespace llvm { class TargetRegisterInfo; @@ -50,10 +50,10 @@ private: unsigned Loc; /// isMem - True if this is a memory loc, false if it is a register loc. - bool isMem : 1; + unsigned isMem : 1; /// isCustom - True if this arg/retval requires special handling. - bool isCustom : 1; + unsigned isCustom : 1; /// Information about how the value is assigned. LocInfo HTP : 6; diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 90ee23424498..9a27661b5190 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -1,4 +1,4 @@ -//===-- CommandFlags.h - Register Coalescing Interface ----------*- C++ -*-===// +//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,13 +13,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H -#define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H +#ifndef LLVM_CODEGEN_COMMANDFLAGS_H +#define LLVM_CODEGEN_COMMANDFLAGS_H -#include "llvm/Support/CommandLine.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" - #include using namespace llvm; diff --git a/include/llvm/CodeGen/DAGCombine.h b/include/llvm/CodeGen/DAGCombine.h new file mode 100644 index 000000000000..8b5919005451 --- /dev/null +++ b/include/llvm/CodeGen/DAGCombine.h @@ -0,0 +1,25 @@ +//===-- llvm/CodeGen/DAGCombine.h ------- SelectionDAG Nodes ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_CODEGEN_DAGCOMBINE_H +#define LLVM_CODEGEN_DAGCOMBINE_H + +namespace llvm { + +enum CombineLevel { + BeforeLegalizeTypes, + AfterLegalizeTypes, + AfterLegalizeVectorOps, + AfterLegalizeDAG +}; + +} // end llvm namespace + +#endif diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index 2d2db78144a4..9d25fd377b7e 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -26,8 +26,8 @@ #ifndef LLVM_CODEGEN_DFAPACKETIZER_H #define LLVM_CODEGEN_DFAPACKETIZER_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include namespace llvm { @@ -135,7 +135,7 @@ public: // initPacketizerState - perform initialization before packetizing // an instruction. This function is supposed to be overrided by // the target dependent packetizer. - virtual void initPacketizerState(void) { return; } + virtual void initPacketizerState() { return; } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. virtual bool ignorePseudoInstruction(MachineInstr *I, diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 7c24e36092b4..705db7e64340 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_FASTISEL_H #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { @@ -90,6 +90,11 @@ public: /// getCurDebugLoc() - Return current debug location information. DebugLoc getCurDebugLoc() const { return DL; } + + /// LowerArguments - Do "fast" instruction selection for function arguments + /// and append machine instructions to the current block. Return true if + /// it is successful. + bool LowerArguments(); /// SelectInstruction - Do "fast" instruction selection for the given /// LLVM IR instruction, and append generated machine instructions to @@ -131,6 +136,10 @@ public: /// into the current block. void recomputeInsertPt(); + /// removeDeadCode - Remove all dead instructions between the I and E. + void removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E); + struct SavePoint { MachineBasicBlock::iterator InsertPt; DebugLoc DL; @@ -156,6 +165,11 @@ protected: /// virtual bool TargetSelectInstruction(const Instruction *I) = 0; + + /// FastLowerArguments - This method is called by target-independent code to + /// do target specific argument lowering. It returns true if it was + /// successful. + virtual bool FastLowerArguments(); /// FastEmit_r - This method is called by target-independent code /// to request that an instruction with the given type and opcode @@ -395,10 +409,6 @@ private: /// hasTrivialKill - Test whether the given value has exactly one use. bool hasTrivialKill(const Value *V) const; - - /// removeDeadCode - Remove all dead instructions between the I and E. - void removeDeadCode(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E); }; } diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 8cf22eca4fa6..ea6cb27b7b13 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -15,19 +15,15 @@ #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Support/CallSite.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/Target/TargetRegisterInfo.h" #include @@ -35,6 +31,7 @@ namespace llvm { class AllocaInst; class BasicBlock; +class BranchProbabilityInfo; class CallInst; class Function; class GlobalVariable; @@ -136,7 +133,7 @@ public: return ValueMap.count(V); } - unsigned CreateReg(EVT VT); + unsigned CreateReg(MVT VT); unsigned CreateRegs(Type *Ty); diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index 076f6f39fe2c..1070d29f7381 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -33,9 +33,9 @@ #ifndef LLVM_CODEGEN_GCMETADATA_H #define LLVM_CODEGEN_GCMETADATA_H -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" #include "llvm/Support/DebugLoc.h" namespace llvm { @@ -180,7 +180,8 @@ namespace llvm { GCModuleInfo(); ~GCModuleInfo(); - /// clear - Resets the pass. The metadata deleter pass calls this. + /// clear - Resets the pass. Any pass, which uses GCModuleInfo, should + /// call it in doFinalization(). /// void clear(); diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h index c407b6167485..456d2dcb51a6 100644 --- a/include/llvm/CodeGen/GCs.h +++ b/include/llvm/CodeGen/GCs.h @@ -26,6 +26,12 @@ namespace llvm { /// Creates an ocaml-compatible metadata printer. void linkOcamlGCPrinter(); + + /// Creates an erlang-compatible garbage collector. + void linkErlangGC(); + + /// Creates an erlang-compatible metadata printer. + void linkErlangGCPrinter(); /// Creates a shadow stack garbage collector. This collector requires no code /// generator support. diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 5d0a3b4c7067..442729b5d775 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -311,8 +311,10 @@ namespace ISD { /// the shift amount can be any type, but care must be taken to ensure it is /// large enough. TLI.getShiftAmountTy() is i8 on some targets, but before /// legalization, types like i1024 can occur and i8 doesn't have enough bits - /// to represent the shift amount. By convention, DAGCombine and - /// SelectionDAGBuilder forces these shift amounts to i32 for simplicity. + /// to represent the shift amount. + /// When the 1st operand is a vector, the shift amount must be in the same + /// type. (TLI.getShiftAmountTy() will return the same type when the input + /// type is a vector.) SHL, SRA, SRL, ROTL, ROTR, /// Byte Swap and Counting operators. @@ -455,6 +457,9 @@ namespace ISD { FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR, + + /// FSINCOS - Compute both fsin and fcos as a single operation. + FSINCOS, /// LOAD and STORE have token chains as their first operand, then the same /// operands as an LLVM load/store instruction, then an offset node that diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h index 5a3fb4b1a3df..68389dde494f 100644 --- a/include/llvm/CodeGen/IntrinsicLowering.h +++ b/include/llvm/CodeGen/IntrinsicLowering.h @@ -16,7 +16,7 @@ #ifndef LLVM_CODEGEN_INTRINSICLOWERING_H #define LLVM_CODEGEN_INTRINSICLOWERING_H -#include "llvm/Intrinsics.h" +#include "llvm/IR/Intrinsics.h" namespace llvm { class CallInst; diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h index 89f00e91f78e..9a7321418698 100644 --- a/include/llvm/CodeGen/JITCodeEmitter.h +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -17,11 +17,11 @@ #ifndef LLVM_CODEGEN_JITCODEEMITTER_H #define LLVM_CODEGEN_JITCODEEMITTER_H -#include +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MathExtras.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/ADT/DenseMap.h" +#include namespace llvm { @@ -207,8 +207,7 @@ public: /// emitString - This callback is invoked when a String needs to be /// written to the output stream. void emitString(const std::string &String) { - for (unsigned i = 0, N = static_cast(String.size()); - i < N; ++i) { + for (size_t i = 0, N = String.size(); i < N; ++i) { uint8_t C = String[i]; emitByte(C); } diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index 8fb31aa8a6d1..d454347d0b82 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LATENCY_PRIORITY_QUEUE_H -#define LATENCY_PRIORITY_QUEUE_H +#ifndef LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H +#define LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h index 8414c64544e5..ff65db4ee4ce 100644 --- a/include/llvm/CodeGen/LexicalScopes.h +++ b/include/llvm/CodeGen/LexicalScopes.h @@ -17,11 +17,11 @@ #ifndef LLVM_CODEGEN_LEXICALSCOPES_H #define LLVM_CODEGEN_LEXICALSCOPES_H -#include "llvm/Metadata.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/ValueHandle.h" #include @@ -159,9 +159,6 @@ public: LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) { -#ifndef NDEBUG - IndentLevel = 0; -#endif if (Parent) Parent->addChild(this); } @@ -228,7 +225,7 @@ public: void setDFSIn(unsigned I) { DFSIn = I; } /// dump - print lexical scope. - void dump() const; + void dump(unsigned Indent = 0) const; private: LexicalScope *Parent; // Parent to this scope. @@ -244,9 +241,6 @@ private: const MachineInstr *FirstInsn; // First instruction of this scope. unsigned DFSIn, DFSOut; // In & Out Depth use to determine // scope nesting. -#ifndef NDEBUG - mutable unsigned IndentLevel; // Private state for dump() -#endif }; } // end llvm namespace diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h index 7d1b1fe477a5..c3046da90b8d 100644 --- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h +++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h @@ -29,6 +29,7 @@ namespace { return; llvm::linkOcamlGCPrinter(); + llvm::linkErlangGCPrinter(); } } ForceAsmWriterLinking; // Force link by creating a global definition. diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index 46dd004609f5..916c0f233ef8 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H #define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H +#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/Target/TargetMachine.h" #include @@ -37,6 +37,7 @@ namespace { (void) llvm::createDefaultPBQPRegisterAllocator(); llvm::linkOcamlGC(); + llvm::linkErlangGC(); llvm::linkShadowStackGC(); (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default); diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 185e414ae2cd..244be9c50155 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -22,9 +22,9 @@ #define LLVM_CODEGEN_LIVEINTERVAL_H #include "llvm/ADT/IntEqClasses.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/AlignOf.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" #include #include @@ -86,9 +86,10 @@ namespace llvm { SlotIndex end; // End point of the interval (exclusive) VNInfo *valno; // identifier for the value contained in this interval. + LiveRange() : valno(0) {} + LiveRange(SlotIndex S, SlotIndex E, VNInfo *V) : start(S), end(E), valno(V) { - assert(S < E && "Cannot create empty or backwards range"); } @@ -373,8 +374,8 @@ namespace llvm { /// addRange - Add the specified LiveRange to this interval, merging /// intervals as appropriate. This returns an iterator to the inserted live /// range (which may have grown since it was inserted. - void addRange(LiveRange LR) { - addRangeFrom(LR, ranges.begin()); + iterator addRange(LiveRange LR) { + return addRangeFrom(LR, ranges.begin()); } /// extendInBlock - If this interval is live before Kill in the basic block @@ -460,9 +461,6 @@ namespace llvm { void extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd); Ranges::iterator extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStr); void markValNoForDeletion(VNInfo *V); - void mergeIntervalRanges(const LiveInterval &RHS, - VNInfo *LHSValNo = 0, - const VNInfo *RHSValNo = 0); LiveInterval& operator=(const LiveInterval& rhs) LLVM_DELETED_FUNCTION; @@ -473,6 +471,64 @@ namespace llvm { return OS; } + /// Helper class for performant LiveInterval bulk updates. + /// + /// Calling LiveInterval::addRange() repeatedly can be expensive on large + /// live ranges because segments after the insertion point may need to be + /// shifted. The LiveRangeUpdater class can defer the shifting when adding + /// many segments in order. + /// + /// The LiveInterval will be in an invalid state until flush() is called. + class LiveRangeUpdater { + LiveInterval *LI; + SlotIndex LastStart; + LiveInterval::iterator WriteI; + LiveInterval::iterator ReadI; + SmallVector Spills; + void mergeSpills(); + + public: + /// Create a LiveRangeUpdater for adding segments to LI. + /// LI will temporarily be in an invalid state until flush() is called. + LiveRangeUpdater(LiveInterval *li = 0) : LI(li) {} + + ~LiveRangeUpdater() { flush(); } + + /// Add a segment to LI and coalesce when possible, just like LI.addRange(). + /// Segments should be added in increasing start order for best performance. + void add(LiveRange); + + void add(SlotIndex Start, SlotIndex End, VNInfo *VNI) { + add(LiveRange(Start, End, VNI)); + } + + /// Return true if the LI is currently in an invalid state, and flush() + /// needs to be called. + bool isDirty() const { return LastStart.isValid(); } + + /// Flush the updater state to LI so it is valid and contains all added + /// segments. + void flush(); + + /// Select a different destination live range. + void setDest(LiveInterval *li) { + if (LI != li && isDirty()) + flush(); + LI = li; + } + + /// Get the current destination live range. + LiveInterval *getDest() const { return LI; } + + void dump() const; + void print(raw_ostream&) const; + }; + + inline raw_ostream &operator<<(raw_ostream &OS, const LiveRangeUpdater &X) { + X.print(OS); + return OS; + } + /// LiveRangeQuery - Query information about a live range around a given /// instruction. This class hides the implementation details of live ranges, /// and it should be used as the primary interface for examining live ranges diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index b421753dd536..7d72f37255b4 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -20,22 +20,21 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetRegisterInfo.h" #include #include namespace llvm { class AliasAnalysis; + class BitVector; class LiveRangeCalc; class LiveVariables; class MachineDominatorTree; @@ -53,7 +52,6 @@ namespace llvm { const TargetRegisterInfo* TRI; const TargetInstrInfo* TII; AliasAnalysis *AA; - LiveVariables* LV; SlotIndexes* Indexes; MachineDominatorTree *DomTree; LiveRangeCalc *LRCalc; @@ -215,6 +213,13 @@ namespace llvm { return Indexes->getMBBFromIndex(index); } + void insertMBBInMaps(MachineBasicBlock *MBB) { + Indexes->insertMBBInMaps(MBB); + assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() && + "Blocks must be added in order."); + RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0)); + } + SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) { return Indexes->insertMachineInstrInMaps(MI); } @@ -275,6 +280,21 @@ namespace llvm { void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart, bool UpdateFlags = false); + /// repairIntervalsInRange - Update live intervals for instructions in a + /// range of iterators. It is intended for use after target hooks that may + /// insert or remove instructions, and is only efficient for a small number + /// of instructions. + /// + /// OrigRegs is a vector of registers that were originally used by the + /// instructions in the range between the two iterators. + /// + /// Currently, the only only changes that are supported are simple removal + /// and addition of uses. + void repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + ArrayRef OrigRegs); + // Register mask functions. // // Machine instructions may use a register mask operand to indicate that a @@ -347,37 +367,17 @@ namespace llvm { return RegUnitIntervals[Unit]; } - private: - /// computeIntervals - Compute live intervals. - void computeIntervals(); + const LiveInterval *getCachedRegUnit(unsigned Unit) const { + return RegUnitIntervals[Unit]; + } + private: /// Compute live intervals for all virtual registers. void computeVirtRegs(); /// Compute RegMaskSlots and RegMaskBits. void computeRegMasks(); - /// handleRegisterDef - update intervals for a register def - /// (calls handleVirtualRegisterDef) - void handleRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, - SlotIndex MIIdx, - MachineOperand& MO, unsigned MOIdx); - - /// isPartialRedef - Return true if the specified def at the specific index - /// is partially re-defining the specified live interval. A common case of - /// this is a definition of the sub-register. - bool isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, - LiveInterval &interval); - - /// handleVirtualRegisterDef - update intervals for a virtual - /// register def - void handleVirtualRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, - SlotIndex MIIdx, MachineOperand& MO, - unsigned MOIdx, - LiveInterval& interval); - static LiveInterval* createInterval(unsigned Reg); void printInstrs(raw_ostream &O) const; diff --git a/lib/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h similarity index 95% rename from lib/CodeGen/LiveIntervalUnion.h rename to include/llvm/CodeGen/LiveIntervalUnion.h index 4d41fca85ad3..615b339bd79c 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/include/llvm/CodeGen/LiveIntervalUnion.h @@ -14,15 +14,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVEINTERVALUNION -#define LLVM_CODEGEN_LIVEINTERVALUNION +#ifndef LLVM_CODEGEN_LIVEINTERVALUNION_H +#define LLVM_CODEGEN_LIVEINTERVALUNION_H #include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/LiveInterval.h" namespace llvm { -class MachineLoopRange; class TargetRegisterInfo; #ifndef NDEBUG @@ -173,10 +172,6 @@ public: return InterferingVRegs; } - /// checkLoopInterference - Return true if there is interference overlapping - /// Loop. - bool checkLoopInterference(MachineLoopRange*); - private: Query(const Query&) LLVM_DELETED_FUNCTION; void operator=(const Query&) LLVM_DELETED_FUNCTION; @@ -207,4 +202,4 @@ public: } // end namespace llvm -#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION) +#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION_H) diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index def7b00ce761..8a32a3c11a82 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -83,7 +83,7 @@ private: /// allUsesAvailableAt - Return true if all registers used by OrigMI at /// OrigIdx are also available with the same value at UseIdx. bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx); + SlotIndex UseIdx) const; /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. diff --git a/lib/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h similarity index 99% rename from lib/CodeGen/LiveRegMatrix.h rename to include/llvm/CodeGen/LiveRegMatrix.h index 8f22c24478f4..7a3e9e8347f4 100644 --- a/lib/CodeGen/LiveRegMatrix.h +++ b/include/llvm/CodeGen/LiveRegMatrix.h @@ -24,9 +24,9 @@ #ifndef LLVM_CODEGEN_LIVEREGMATRIX_H #define LLVM_CODEGEN_LIVEREGMATRIX_H -#include "LiveIntervalUnion.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h index 86c4d7c11067..92c35f784d4c 100644 --- a/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/include/llvm/CodeGen/LiveStackAnalysis.h @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVESTACK_ANALYSIS_H -#define LLVM_CODEGEN_LIVESTACK_ANALYSIS_H +#ifndef LLVM_CODEGEN_LIVESTACKANALYSIS_H +#define LLVM_CODEGEN_LIVESTACKANALYSIS_H -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetRegisterInfo.h" #include namespace llvm { diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h index 3bb134b8fb2a..6628fd278e45 100644 --- a/include/llvm/CodeGen/LiveVariables.h +++ b/include/llvm/CodeGen/LiveVariables.h @@ -29,21 +29,19 @@ #ifndef LLVM_CODEGEN_LIVEVARIABLES_H #define LLVM_CODEGEN_LIVEVARIABLES_H -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { +class MachineBasicBlock; class MachineRegisterInfo; -class TargetRegisterInfo; class LiveVariables : public MachineFunctionPass { public: diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h index 21fe74f8e1cd..8c9b7a84e5b8 100644 --- a/include/llvm/CodeGen/MachORelocation.h +++ b/include/llvm/CodeGen/MachORelocation.h @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_MACHO_RELOCATION_H -#define LLVM_CODEGEN_MACHO_RELOCATION_H +#ifndef LLVM_CODEGEN_MACHORELOCATION_H +#define LLVM_CODEGEN_MACHORELOCATION_H #include "llvm/Support/DataTypes.h" @@ -53,4 +53,4 @@ namespace llvm { } // end llvm namespace -#endif // LLVM_CODEGEN_MACHO_RELOCATION_H +#endif // LLVM_CODEGEN_MACHORELOCATION_H diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 97c39458d93d..492a3ff49f8c 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H #define LLVM_CODEGEN_MACHINEBASICBLOCK_H -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/DataTypes.h" #include @@ -146,11 +146,11 @@ public: bundle_iterator(IterTy mii) : MII(mii) {} bundle_iterator(Ty &mi) : MII(mi) { - assert(!mi.isInsideBundle() && + assert(!mi.isBundledWithPred() && "It's not legal to initialize bundle_iterator with a bundled MI"); } bundle_iterator(Ty *mi) : MII(mi) { - assert((!mi || !mi->isInsideBundle()) && + assert((!mi || !mi->isBundledWithPred()) && "It's not legal to initialize bundle_iterator with a bundled MI"); } // Template allows conversion from const to nonconst. @@ -174,13 +174,13 @@ public: // Increment and decrement operators... bundle_iterator &operator--() { // predecrement - Back up do --MII; - while (MII->isInsideBundle()); + while (MII->isBundledWithPred()); return *this; } bundle_iterator &operator++() { // preincrement - Advance - IterTy E = MII->getParent()->instr_end(); - do ++MII; - while (MII != E && MII->isInsideBundle()); + while (MII->isBundledWithSucc()) + ++MII; + ++MII; return *this; } bundle_iterator operator--(int) { // postdecrement operators... @@ -441,80 +441,107 @@ public: void pop_back() { Insts.pop_back(); } void push_back(MachineInstr *MI) { Insts.push_back(MI); } - template - void insert(instr_iterator I, IT S, IT E) { - Insts.insert(I, S, E); - } - instr_iterator insert(instr_iterator I, MachineInstr *M) { - return Insts.insert(I, M); - } - instr_iterator insertAfter(instr_iterator I, MachineInstr *M) { - return Insts.insertAfter(I, M); - } + /// Insert MI into the instruction list before I, possibly inside a bundle. + /// + /// If the insertion point is inside a bundle, MI will be added to the bundle, + /// otherwise MI will not be added to any bundle. That means this function + /// alone can't be used to prepend or append instructions to bundles. See + /// MIBundleBuilder::insert() for a more reliable way of doing that. + instr_iterator insert(instr_iterator I, MachineInstr *M); + /// Insert a range of instructions into the instruction list before I. template void insert(iterator I, IT S, IT E) { Insts.insert(I.getInstrIterator(), S, E); } - iterator insert(iterator I, MachineInstr *M) { - return Insts.insert(I.getInstrIterator(), M); - } - iterator insertAfter(iterator I, MachineInstr *M) { - return Insts.insertAfter(I.getInstrIterator(), M); + + /// Insert MI into the instruction list before I. + iterator insert(iterator I, MachineInstr *MI) { + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + return Insts.insert(I.getInstrIterator(), MI); } - /// erase - Remove the specified element or range from the instruction list. - /// These functions delete any instructions removed. + /// Insert MI into the instruction list after I. + iterator insertAfter(iterator I, MachineInstr *MI) { + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + return Insts.insertAfter(I.getInstrIterator(), MI); + } + + /// Remove an instruction from the instruction list and delete it. /// - instr_iterator erase(instr_iterator I) { - return Insts.erase(I); - } - instr_iterator erase(instr_iterator I, instr_iterator E) { - return Insts.erase(I, E); - } + /// If the instruction is part of a bundle, the other instructions in the + /// bundle will still be bundled after removing the single instruction. + instr_iterator erase(instr_iterator I); + + /// Remove an instruction from the instruction list and delete it. + /// + /// If the instruction is part of a bundle, the other instructions in the + /// bundle will still be bundled after removing the single instruction. instr_iterator erase_instr(MachineInstr *I) { - instr_iterator MII(I); - return erase(MII); + return erase(instr_iterator(I)); } - iterator erase(iterator I); + /// Remove a range of instructions from the instruction list and delete them. iterator erase(iterator I, iterator E) { return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); } - iterator erase(MachineInstr *I) { - iterator MII(I); - return erase(MII); + + /// Remove an instruction or bundle from the instruction list and delete it. + /// + /// If I points to a bundle of instructions, they are all erased. + iterator erase(iterator I) { + return erase(I, llvm::next(I)); } - /// remove - Remove the instruction from the instruction list. This function - /// does not delete the instruction. WARNING: Note, if the specified - /// instruction is a bundle this function will remove all the bundled - /// instructions as well. It is up to the caller to keep a list of the - /// bundled instructions and re-insert them if desired. This function is - /// *not recommended* for manipulating instructions with bundles. Use - /// splice instead. - MachineInstr *remove(MachineInstr *I); + /// Remove an instruction from the instruction list and delete it. + /// + /// If I is the head of a bundle of instructions, the whole bundle will be + /// erased. + iterator erase(MachineInstr *I) { + return erase(iterator(I)); + } + + /// Remove the unbundled instruction from the instruction list without + /// deleting it. + /// + /// This function can not be used to remove bundled instructions, use + /// remove_instr to remove individual instructions from a bundle. + MachineInstr *remove(MachineInstr *I) { + assert(!I->isBundled() && "Cannot remove bundled instructions"); + return Insts.remove(I); + } + + /// Remove the possibly bundled instruction from the instruction list + /// without deleting it. + /// + /// If the instruction is part of a bundle, the other instructions in the + /// bundle will still be bundled after removing the single instruction. + MachineInstr *remove_instr(MachineInstr *I); + void clear() { Insts.clear(); } - /// splice - Take an instruction from MBB 'Other' at the position From, - /// and insert it into this MBB right before 'where'. - void splice(instr_iterator where, MachineBasicBlock *Other, - instr_iterator From) { - Insts.splice(where, Other->Insts, From); + /// Take an instruction from MBB 'Other' at the position From, and insert it + /// into this MBB right before 'Where'. + /// + /// If From points to a bundle of instructions, the whole bundle is moved. + void splice(iterator Where, MachineBasicBlock *Other, iterator From) { + // The range splice() doesn't allow noop moves, but this one does. + if (Where != From) + splice(Where, Other, From, llvm::next(From)); } - void splice(iterator where, MachineBasicBlock *Other, iterator From); - /// splice - Take a block of instructions from MBB 'Other' in the range [From, - /// To), and insert them into this MBB right before 'where'. - void splice(instr_iterator where, MachineBasicBlock *Other, instr_iterator From, - instr_iterator To) { - Insts.splice(where, Other->Insts, From, To); - } - void splice(iterator where, MachineBasicBlock *Other, iterator From, - iterator To) { - Insts.splice(where.getInstrIterator(), Other->Insts, + /// Take a block of instructions from MBB 'Other' in the range [From, To), + /// and insert them into this MBB right before 'Where'. + /// + /// The instruction at 'Where' must not be included in the range of + /// instructions to move. + void splice(iterator Where, MachineBasicBlock *Other, + iterator From, iterator To) { + Insts.splice(Where.getInstrIterator(), Other->Insts, From.getInstrIterator(), To.getInstrIterator()); } diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index 12189ceb7f16..98dd03b45cf7 100644 --- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -1,4 +1,3 @@ - //==- MachineBranchProbabilityInfo.h - Machine Branch Probability Analysis -==// // // The LLVM Compiler Infrastructure @@ -15,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h index 86e8f27877e2..9e41e6e9c1ee 100644 --- a/include/llvm/CodeGen/MachineCodeEmitter.h +++ b/include/llvm/CodeGen/MachineCodeEmitter.h @@ -19,7 +19,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" - #include namespace llvm { diff --git a/include/llvm/CodeGen/MachineCodeInfo.h b/include/llvm/CodeGen/MachineCodeInfo.h index c5c0c4450454..ba9dfab91a7e 100644 --- a/include/llvm/CodeGen/MachineCodeInfo.h +++ b/include/llvm/CodeGen/MachineCodeInfo.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef EE_MACHINE_CODE_INFO_H -#define EE_MACHINE_CODE_INFO_H +#ifndef LLVM_CODEGEN_MACHINECODEINFO_H +#define LLVM_CODEGEN_MACHINECODEINFO_H #include "llvm/Support/DataTypes.h" diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 82a4ac821b69..e41d206da65c 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -15,11 +15,11 @@ #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H #define LLVM_CODEGEN_MACHINEDOMINATORS_H +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/DominatorInternals.h" namespace llvm { @@ -41,15 +41,15 @@ class MachineDominatorTree : public MachineFunctionPass { public: static char ID; // Pass ID, replacement for typeid DominatorTreeBase* DT; - + MachineDominatorTree(); - + ~MachineDominatorTree(); - + DominatorTreeBase& getBase() { return *DT; } - + virtual void getAnalysisUsage(AnalysisUsage &AU) const; - + /// getRoots - Return the root blocks of the current CFG. This may include /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). @@ -57,33 +57,35 @@ public: inline const std::vector &getRoots() const { return DT->getRoots(); } - + inline MachineBasicBlock *getRoot() const { return DT->getRoot(); } - + inline MachineDomTreeNode *getRootNode() const { return DT->getRootNode(); } - + virtual bool runOnMachineFunction(MachineFunction &F); - - inline bool dominates(MachineDomTreeNode* A, MachineDomTreeNode* B) const { + + inline bool dominates(const MachineDomTreeNode* A, + const MachineDomTreeNode* B) const { return DT->dominates(A, B); } - - inline bool dominates(MachineBasicBlock* A, MachineBasicBlock* B) const { + + inline bool dominates(const MachineBasicBlock* A, + const MachineBasicBlock* B) const { return DT->dominates(A, B); } - + // dominates - Return true if A dominates B. This performs the // special checks necessary if A and B are in the same basic block. - bool dominates(MachineInstr *A, MachineInstr *B) const { - MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent(); + bool dominates(const MachineInstr *A, const MachineInstr *B) const { + const MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent(); if (BBA != BBB) return DT->dominates(BBA, BBB); // Loop through the basic block until we find A or B. - MachineBasicBlock::iterator I = BBA->begin(); + MachineBasicBlock::const_iterator I = BBA->begin(); for (; &*I != A && &*I != B; ++I) /*empty*/ ; @@ -95,43 +97,43 @@ public: // return &*I == B; //} } - + inline bool properlyDominates(const MachineDomTreeNode* A, - MachineDomTreeNode* B) const { + const MachineDomTreeNode* B) const { return DT->properlyDominates(A, B); } - - inline bool properlyDominates(MachineBasicBlock* A, - MachineBasicBlock* B) const { + + inline bool properlyDominates(const MachineBasicBlock* A, + const MachineBasicBlock* B) const { return DT->properlyDominates(A, B); } - + /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { return DT->findNearestCommonDominator(A, B); } - + inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { return DT->getNode(BB); } - + /// getNode - return the (Post)DominatorTree node for the specified basic /// block. This is the same as using operator[] on this class. /// inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { return DT->getNode(BB); } - + /// addNewBlock - Add a new node to the dominator tree information. This - /// creates a new node as a child of DomBB dominator node,linking it into + /// creates a new node as a child of DomBB dominator node,linking it into /// the children list of the immediate dominator. inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB) { return DT->addNewBlock(BB, DomBB); } - + /// changeImmediateDominator - This method is used to update the dominator /// tree information when a node's immediate dominator changes. /// @@ -139,19 +141,19 @@ public: MachineBasicBlock* NewIDom) { DT->changeImmediateDominator(N, NewIDom); } - + inline void changeImmediateDominator(MachineDomTreeNode *N, MachineDomTreeNode* NewIDom) { DT->changeImmediateDominator(N, NewIDom); } - + /// eraseNode - Removes a node from the dominator tree. Block must not /// dominate any other blocks. Removes node from its immediate dominator's /// children list. Deletes dominator node associated with basic block BB. inline void eraseNode(MachineBasicBlock *BB) { DT->eraseNode(BB); } - + /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. inline void splitBlock(MachineBasicBlock* NewBB) { @@ -160,12 +162,12 @@ public: /// isReachableFromEntry - Return true if A is dominated by the entry /// block of the function containing it. - bool isReachableFromEntry(MachineBasicBlock *A) { + bool isReachableFromEntry(const MachineBasicBlock *A) { return DT->isReachableFromEntry(A); } virtual void releaseMemory(); - + virtual void print(raw_ostream &OS, const Module*) const; }; @@ -179,7 +181,7 @@ template struct GraphTraits; template <> struct GraphTraits { typedef MachineDomTreeNode NodeType; typedef NodeType::iterator ChildIteratorType; - + static NodeType *getEntryNode(NodeType *N) { return N; } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 0e4e132e40d9..cdec7e663708 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -221,8 +221,11 @@ class MachineFrameInfo { /// just allocate them normally. bool UseLocalStackAllocationBlock; + /// Whether the "realign-stack" option is on. + bool RealignOption; public: - explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) { + explicit MachineFrameInfo(const TargetFrameLowering &tfi, bool RealignOpt) + : TFI(tfi), RealignOption(RealignOpt) { StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; HasVarSizedObjects = false; FrameAddressTaken = false; @@ -416,6 +419,9 @@ public: /// void setStackSize(uint64_t Size) { StackSize = Size; } + /// Estimate and return the size of the stack frame. + unsigned estimateStackSize(const MachineFunction &MF) const; + /// getOffsetAdjustment - Return the correction for frame offsets. /// int getOffsetAdjustment() const { return OffsetAdjustment; } @@ -432,9 +438,7 @@ public: /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. - void ensureMaxAlignment(unsigned Align) { - if (MaxAlignment < Align) MaxAlignment = Align; - } + void ensureMaxAlignment(unsigned Align); /// AdjustsStack - Return true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -496,26 +500,13 @@ public: /// a nonnegative identifier to represent it. /// int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, - bool MayNeedSP = false, const AllocaInst *Alloca = 0) { - assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, - Alloca)); - int Index = (int)Objects.size() - NumFixedObjects - 1; - assert(Index >= 0 && "Bad frame index!"); - ensureMaxAlignment(Alignment); - return Index; - } + bool MayNeedSP = false, const AllocaInst *Alloca = 0); /// CreateSpillStackObject - Create a new statically sized stack object that /// represents a spill slot, returning a nonnegative identifier to represent /// it. /// - int CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - CreateStackObject(Size, Alignment, true, false); - int Index = (int)Objects.size() - NumFixedObjects - 1; - ensureMaxAlignment(Alignment); - return Index; - } + int CreateSpillStackObject(uint64_t Size, unsigned Alignment); /// RemoveStackObject - Remove or mark dead a statically sized stack object. /// @@ -529,12 +520,7 @@ public: /// variable sized object is created, whether or not the index returned is /// actually used. /// - int CreateVariableSizedObject(unsigned Alignment) { - HasVarSizedObjects = true; - Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); - ensureMaxAlignment(Alignment); - return (int)Objects.size()-NumFixedObjects-1; - } + int CreateVariableSizedObject(unsigned Alignment); /// getCalleeSavedInfo - Returns a reference to call saved info vector for the /// current function. diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 025e18a9dde0..82c4cd659840 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -18,10 +18,11 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H #define LLVM_CODEGEN_MACHINEFUNCTION_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/ilist.h" -#include "llvm/Support/DebugLoc.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/DebugLoc.h" #include "llvm/Support/Recycler.h" namespace llvm { @@ -105,6 +106,9 @@ class MachineFunction { // Allocation management for instructions in function. Recycler InstructionRecycler; + // Allocation management for operand arrays on instructions. + ArrayRecycler OperandRecycler; + // Allocation management for basic blocks in function. Recycler BasicBlockRecycler; @@ -127,6 +131,9 @@ class MachineFunction { /// about the control flow of such functions. bool ExposesReturnsTwice; + /// True if the function includes MS-style inline assembly. + bool HasMSInlineAsm; + MachineFunction(const MachineFunction &) LLVM_DELETED_FUNCTION; void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION; public: @@ -210,6 +217,17 @@ public: void setExposesReturnsTwice(bool B) { ExposesReturnsTwice = B; } + + /// Returns true if the function contains any MS-style inline assembly. + bool hasMSInlineAsm() const { + return HasMSInlineAsm; + } + + /// Set a flag that indicates that the function contains MS-style inline + /// assembly. + void setHasMSInlineAsm(bool B) { + HasMSInlineAsm = B; + } /// getInfo - Keep track of various per-function pieces of information for /// backends that would like to do so. @@ -394,6 +412,21 @@ public: MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); + typedef ArrayRecycler::Capacity OperandCapacity; + + /// Allocate an array of MachineOperands. This is only intended for use by + /// internal MachineInstr functions. + MachineOperand *allocateOperandArray(OperandCapacity Cap) { + return OperandRecycler.allocate(Cap, Allocator); + } + + /// Dellocate an array of MachineOperands and recycle the memory. This is + /// only intended for use by internal MachineInstr functions. + /// Cap must be the same capacity that was used to allocate the array. + void deallocateOperandArray(OperandCapacity Cap, MachineOperand *Array) { + OperandRecycler.deallocate(Cap, Array); + } + /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand /// pointers. This array is owned by the MachineFunction. MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num); diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h index 50ea2062f30c..112f07ea50d7 100644 --- a/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H -#define LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H +#ifndef LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H +#define LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H #include "llvm/Pass.h" -#include "llvm/Target/TargetMachine.h" namespace llvm { class MachineFunction; +class TargetMachine; /// MachineFunctionAnalysis - This class is a Pass that manages a /// MachineFunction object. diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h index b7bf0a36c447..04881e52ca7f 100644 --- a/include/llvm/CodeGen/MachineFunctionPass.h +++ b/include/llvm/CodeGen/MachineFunctionPass.h @@ -16,8 +16,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H -#define LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H +#ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H +#define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #include "llvm/Pass.h" diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 7eb03a93012d..195cce7a64d7 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -16,17 +16,18 @@ #ifndef LLVM_CODEGEN_MACHINEINSTR_H #define LLVM_CODEGEN_MACHINEINSTR_H -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Target/TargetOpcodes.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/InlineAsm.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/ArrayRecycler.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Target/TargetOpcodes.h" #include namespace llvm { @@ -42,6 +43,10 @@ class MachineMemOperand; //===----------------------------------------------------------------------===// /// MachineInstr - Representation of each machine instruction. /// +/// This class isn't a POD type, but it must have a trivial destructor. When a +/// MachineFunction is deleted, all the contained MachineInstrs are deallocated +/// without having their destructor called. +/// class MachineInstr : public ilist_node { public: typedef MachineMemOperand **mmo_iterator; @@ -58,11 +63,18 @@ public: NoFlags = 0, FrameSetup = 1 << 0, // Instruction is used as a part of // function frame setup code. - InsideBundle = 1 << 1 // Instruction is inside a bundle (not - // the first MI in a bundle) + BundledPred = 1 << 1, // Instruction has bundled predecessors. + BundledSucc = 1 << 2 // Instruction has bundled successors. }; private: const MCInstrDesc *MCID; // Instruction descriptor. + MachineBasicBlock *Parent; // Pointer to the owning basic block. + + // Operands are allocated by an ArrayRecycler. + MachineOperand *Operands; // Pointer to the first operand. + unsigned NumOperands; // Number of operands on instruction. + typedef ArrayRecycler::Capacity OperandCapacity; + OperandCapacity CapOperands; // Capacity of the Operands array. uint8_t Flags; // Various bits of additional // information about machine @@ -75,15 +87,15 @@ private: // anything other than to convey comment // information to AsmPrinter. - uint16_t NumMemRefs; // information on memory references + uint8_t NumMemRefs; // Information on memory references. mmo_iterator MemRefs; - std::vector Operands; // the operands - MachineBasicBlock *Parent; // Pointer to the owning basic block. DebugLoc debugLoc; // Source line information. MachineInstr(const MachineInstr&) LLVM_DELETED_FUNCTION; void operator=(const MachineInstr&) LLVM_DELETED_FUNCTION; + // Use MachineFunction::DeleteMachineInstr() instead. + ~MachineInstr() LLVM_DELETED_FUNCTION; // Intrusive list support friend struct ilist_traits; @@ -94,22 +106,11 @@ private: /// MachineInstr in the given MachineFunction. MachineInstr(MachineFunction &, const MachineInstr &); - /// MachineInstr ctor - This constructor creates a dummy MachineInstr with - /// MCID NULL and no operands. - MachineInstr(); - /// MachineInstr ctor - This constructor create a MachineInstr and add the /// implicit operands. It reserves space for number of operands specified by /// MCInstrDesc. An explicit DebugLoc is supplied. - MachineInstr(const MCInstrDesc &MCID, const DebugLoc dl, bool NoImp = false); - - /// MachineInstr ctor - Work exactly the same as the ctor above, except that - /// the MachineInstr is created and added to the end of the specified basic - /// block. - MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, - const MCInstrDesc &MCID); - - ~MachineInstr(); + MachineInstr(MachineFunction&, const MCInstrDesc &MCID, + const DebugLoc dl, bool NoImp = false); // MachineInstrs are pool-allocated and owned by MachineFunction. friend class MachineFunction; @@ -160,7 +161,9 @@ public: } void setFlags(unsigned flags) { - Flags = flags; + // Filter out the automatically maintained flags. + unsigned Mask = BundledPred | BundledSucc; + Flags = (Flags & Mask) | (flags & ~Mask); } /// clearFlag - Clear a MI flag. @@ -205,21 +208,36 @@ public: /// The first instruction has the special opcode "BUNDLE". It's not "inside" /// a bundle, but the next three MIs are. bool isInsideBundle() const { - return getFlag(InsideBundle); - } - - /// setIsInsideBundle - Set InsideBundle bit. - /// - void setIsInsideBundle(bool Val = true) { - if (Val) - setFlag(InsideBundle); - else - clearFlag(InsideBundle); + return getFlag(BundledPred); } /// isBundled - Return true if this instruction part of a bundle. This is true /// if either itself or its following instruction is marked "InsideBundle". - bool isBundled() const; + bool isBundled() const { + return isBundledWithPred() || isBundledWithSucc(); + } + + /// Return true if this instruction is part of a bundle, and it is not the + /// first instruction in the bundle. + bool isBundledWithPred() const { return getFlag(BundledPred); } + + /// Return true if this instruction is part of a bundle, and it is not the + /// last instruction in the bundle. + bool isBundledWithSucc() const { return getFlag(BundledSucc); } + + /// Bundle this instruction with its predecessor. This can be an unbundled + /// instruction, or it can be the first instruction in a bundle. + void bundleWithPred(); + + /// Bundle this instruction with its successor. This can be an unbundled + /// instruction, or it can be the last instruction in a bundle. + void bundleWithSucc(); + + /// Break bundle above this instruction. + void unbundleFromPred(); + + /// Break bundle below this instruction. + void unbundleFromSucc(); /// getDebugLoc - Returns the debug location id of this MachineInstr. /// @@ -244,7 +262,7 @@ public: /// Access to explicit operands of the instruction. /// - unsigned getNumOperands() const { return (unsigned)Operands.size(); } + unsigned getNumOperands() const { return NumOperands; } const MachineOperand& getOperand(unsigned i) const { assert(i < getNumOperands() && "getOperand() out of range!"); @@ -260,14 +278,14 @@ public: unsigned getNumExplicitOperands() const; /// iterator/begin/end - Iterate over all operands of a machine instruction. - typedef std::vector::iterator mop_iterator; - typedef std::vector::const_iterator const_mop_iterator; + typedef MachineOperand *mop_iterator; + typedef const MachineOperand *const_mop_iterator; - mop_iterator operands_begin() { return Operands.begin(); } - mop_iterator operands_end() { return Operands.end(); } + mop_iterator operands_begin() { return Operands; } + mop_iterator operands_end() { return Operands + NumOperands; } - const_mop_iterator operands_begin() const { return Operands.begin(); } - const_mop_iterator operands_end() const { return Operands.end(); } + const_mop_iterator operands_begin() const { return Operands; } + const_mop_iterator operands_end() const { return Operands + NumOperands; } /// Access to memory operands of the instruction mmo_iterator memoperands_begin() const { return MemRefs; } @@ -295,11 +313,11 @@ public: /// The second argument indicates whether the query should look inside /// instruction bundles. bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const { - // Inline the fast path. - if (Type == IgnoreBundle || !isBundle()) + // Inline the fast path for unbundled or bundle-internal instructions. + if (Type == IgnoreBundle || !isBundled() || isBundledWithPred()) return getDesc().getFlags() & (1 << MCFlag); - // If we have a bundle, take the slow path. + // If this is the first instruction in a bundle, take the slow path. return hasPropertyInBundle(1 << MCFlag, Type); } @@ -578,14 +596,33 @@ public: bool isIdenticalTo(const MachineInstr *Other, MICheckType Check = CheckDefs) const; - /// removeFromParent - This method unlinks 'this' from the containing basic - /// block, and returns it, but does not delete it. + /// Unlink 'this' from the containing basic block, and return it without + /// deleting it. + /// + /// This function can not be used on bundled instructions, use + /// removeFromBundle() to remove individual instructions from a bundle. MachineInstr *removeFromParent(); - /// eraseFromParent - This method unlinks 'this' from the containing basic - /// block and deletes it. + /// Unlink this instruction from its basic block and return it without + /// deleting it. + /// + /// If the instruction is part of a bundle, the other instructions in the + /// bundle remain bundled. + MachineInstr *removeFromBundle(); + + /// Unlink 'this' from the containing basic block and delete it. + /// + /// If this instruction is the header of a bundle, the whole bundle is erased. + /// This function can not be used for instructions inside a bundle, use + /// eraseFromBundle() to erase individual bundled instructions. void eraseFromParent(); + /// Unlink 'this' form its basic block and delete it. + /// + /// If the instruction is part of a bundle, the other instructions in the + /// bundle remain bundled. + void eraseFromBundle(); + /// isLabel - Returns true if the MachineInstr represents a label. /// bool isLabel() const { @@ -605,6 +642,9 @@ public: bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } + bool isMSInlineAsm() const { + return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect(); + } bool isStackAligningInlineAsm() const; InlineAsm::AsmDialect getInlineAsmDialect() const; bool isInsertSubreg() const { @@ -662,7 +702,11 @@ public: } } - /// getBundleSize - Return the number of instructions inside the MI bundle. + /// Return the number of instructions inside the MI bundle, excluding the + /// bundle header. + /// + /// This is the number of instructions that MachineBasicBlock::iterator + /// skips, 0 for unbundled instructions. unsigned getBundleSize() const; /// readsRegister - Return true if the MachineInstr reads the specified @@ -821,13 +865,6 @@ public: /// void clearKillInfo(); - /// copyKillDeadInfo - Copies kill / dead operand properties from MI. - /// - void copyKillDeadInfo(const MachineInstr *MI); - - /// copyPredicates - Copies predicate operand(s) from MI. - void copyPredicates(const MachineInstr *MI); - /// substituteRegister - Replace all occurrences of FromReg with ToReg:SubIdx, /// properly composing subreg indices where necessary. void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, @@ -909,21 +946,35 @@ public: /// copyImplicitOps - Copy implicit register operands from specified /// instruction to this instruction. - void copyImplicitOps(const MachineInstr *MI); + void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI); // // Debugging support // - void print(raw_ostream &OS, const TargetMachine *TM = 0) const; + void print(raw_ostream &OS, const TargetMachine *TM = 0, + bool SkipOpers = false) const; void dump() const; //===--------------------------------------------------------------------===// // Accessors used to build up machine instructions. - /// addOperand - Add the specified operand to the instruction. If it is an - /// implicit operand, it is added to the end of the operand list. If it is - /// an explicit operand it is added at the end of the explicit operand list + /// Add the specified operand to the instruction. If it is an implicit + /// operand, it is added to the end of the operand list. If it is an + /// explicit operand it is added at the end of the explicit operand list /// (before the first implicit operand). + /// + /// MF must be the machine function that was used to allocate this + /// instruction. + /// + /// MachineInstrBuilder provides a more convenient interface for creating + /// instructions and adding operands. + void addOperand(MachineFunction &MF, const MachineOperand &Op); + + /// Add an operand without providing an MF reference. This only works for + /// instructions that are inserted in a basic block. + /// + /// MachineInstrBuilder and the two-argument addOperand(MF, MO) should be + /// preferred. void addOperand(const MachineOperand &Op); /// setDesc - Replace the instruction descriptor (thus opcode) of @@ -950,7 +1001,8 @@ public: /// list. This does not transfer ownership. void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { MemRefs = NewMemRefs; - NumMemRefs = NewMemRefsEnd - NewMemRefs; + NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs); + assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs"); } private: @@ -970,7 +1022,7 @@ private: /// addImplicitDefUseOperands - Add all implicit def and use operands to /// this instruction. - void addImplicitDefUseOperands(); + void addImplicitDefUseOperands(MachineFunction &MF); /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 770685358aba..92c8da991ca4 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -18,6 +18,7 @@ #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -42,10 +43,14 @@ namespace RegState { } class MachineInstrBuilder { + MachineFunction *MF; MachineInstr *MI; public: - MachineInstrBuilder() : MI(0) {} - explicit MachineInstrBuilder(MachineInstr *mi) : MI(mi) {} + MachineInstrBuilder() : MF(0), MI(0) {} + + /// Create a MachineInstrBuilder for manipulating an existing instruction. + /// F must be the machine function that was used to allocate I. + MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {} /// Allow automatic conversion to the machine instruction we are working on. /// @@ -60,86 +65,94 @@ public: unsigned SubReg = 0) const { assert((flags & 0x1) == 0 && "Passing in 'true' to addReg is forbidden! Use enums instead."); - MI->addOperand(MachineOperand::CreateReg(RegNo, - flags & RegState::Define, - flags & RegState::Implicit, - flags & RegState::Kill, - flags & RegState::Dead, - flags & RegState::Undef, - flags & RegState::EarlyClobber, - SubReg, - flags & RegState::Debug, - flags & RegState::InternalRead)); + MI->addOperand(*MF, MachineOperand::CreateReg(RegNo, + flags & RegState::Define, + flags & RegState::Implicit, + flags & RegState::Kill, + flags & RegState::Dead, + flags & RegState::Undef, + flags & RegState::EarlyClobber, + SubReg, + flags & RegState::Debug, + flags & RegState::InternalRead)); return *this; } /// addImm - Add a new immediate operand. /// const MachineInstrBuilder &addImm(int64_t Val) const { - MI->addOperand(MachineOperand::CreateImm(Val)); + MI->addOperand(*MF, MachineOperand::CreateImm(Val)); return *this; } const MachineInstrBuilder &addCImm(const ConstantInt *Val) const { - MI->addOperand(MachineOperand::CreateCImm(Val)); + MI->addOperand(*MF, MachineOperand::CreateCImm(Val)); return *this; } const MachineInstrBuilder &addFPImm(const ConstantFP *Val) const { - MI->addOperand(MachineOperand::CreateFPImm(Val)); + MI->addOperand(*MF, MachineOperand::CreateFPImm(Val)); return *this; } const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateMBB(MBB, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags)); return *this; } const MachineInstrBuilder &addFrameIndex(int Idx) const { - MI->addOperand(MachineOperand::CreateFI(Idx)); + MI->addOperand(*MF, MachineOperand::CreateFI(Idx)); return *this; } const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx, int Offset = 0, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset, + TargetFlags)); return *this; } const MachineInstrBuilder &addJumpTableIndex(unsigned Idx, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateJTI(Idx, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags)); return *this; } const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV, int64_t Offset = 0, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateGA(GV, Offset, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addExternalSymbol(const char *FnName, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags)); + MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags)); + return *this; + } + + const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA, + int64_t Offset = 0, + unsigned char TargetFlags = 0) const { + MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addRegMask(const uint32_t *Mask) const { - MI->addOperand(MachineOperand::CreateRegMask(Mask)); + MI->addOperand(*MF, MachineOperand::CreateRegMask(Mask)); return *this; } const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const { - MI->addMemOperand(*MI->getParent()->getParent(), MMO); + MI->addMemOperand(*MF, MMO); return *this; } @@ -151,17 +164,17 @@ public: const MachineInstrBuilder &addOperand(const MachineOperand &MO) const { - MI->addOperand(MO); + MI->addOperand(*MF, MO); return *this; } const MachineInstrBuilder &addMetadata(const MDNode *MD) const { - MI->addOperand(MachineOperand::CreateMetadata(MD)); + MI->addOperand(*MF, MachineOperand::CreateMetadata(MD)); return *this; } const MachineInstrBuilder &addSym(MCSymbol *Sym) const { - MI->addOperand(MachineOperand::CreateMCSymbol(Sym)); + MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym)); return *this; } @@ -196,6 +209,12 @@ public: } } } + + /// Copy all the implicit operands from OtherMI onto this one. + const MachineInstrBuilder ©ImplicitOps(const MachineInstr *OtherMI) { + MI->copyImplicitOps(*MF, OtherMI); + return *this; + } }; /// BuildMI - Builder interface. Specify how to create the initial instruction @@ -204,7 +223,7 @@ public: inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID) { - return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL)); + return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)); } /// BuildMI - This version of the builder sets up the first operand as a @@ -214,7 +233,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID, unsigned DestReg) { - return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL)) + return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)) .addReg(DestReg, RegState::Define); } @@ -227,9 +246,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, DebugLoc DL, const MCInstrDesc &MCID, unsigned DestReg) { - MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); - return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -237,9 +257,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, DebugLoc DL, const MCInstrDesc &MCID, unsigned DestReg) { - MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); - return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define); + return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -264,18 +285,20 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, DebugLoc DL, const MCInstrDesc &MCID) { - MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); - return MachineInstrBuilder(MI); + return MachineInstrBuilder(MF, MI); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, DebugLoc DL, const MCInstrDesc &MCID) { - MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); - return MachineInstrBuilder(MI); + return MachineInstrBuilder(MF, MI); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -330,6 +353,94 @@ inline unsigned getUndefRegState(bool B) { inline unsigned getInternalReadRegState(bool B) { return B ? RegState::InternalRead : 0; } +inline unsigned getDebugRegState(bool B) { + return B ? RegState::Debug : 0; +} + + +/// Helper class for constructing bundles of MachineInstrs. +/// +/// MIBundleBuilder can create a bundle from scratch by inserting new +/// MachineInstrs one at a time, or it can create a bundle from a sequence of +/// existing MachineInstrs in a basic block. +class MIBundleBuilder { + MachineBasicBlock &MBB; + MachineBasicBlock::instr_iterator Begin; + MachineBasicBlock::instr_iterator End; + +public: + /// Create an MIBundleBuilder that inserts instructions into a new bundle in + /// BB above the bundle or instruction at Pos. + MIBundleBuilder(MachineBasicBlock &BB, + MachineBasicBlock::iterator Pos) + : MBB(BB), Begin(Pos.getInstrIterator()), End(Begin) {} + + /// Create a bundle from the sequence of instructions between B and E. + MIBundleBuilder(MachineBasicBlock &BB, + MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E) + : MBB(BB), Begin(B.getInstrIterator()), End(E.getInstrIterator()) { + assert(B != E && "No instructions to bundle"); + ++B; + while (B != E) { + MachineInstr *MI = B; + ++B; + MI->bundleWithPred(); + } + } + + /// Create an MIBundleBuilder representing an existing instruction or bundle + /// that has MI as its head. + explicit MIBundleBuilder(MachineInstr *MI) + : MBB(*MI->getParent()), Begin(MI), End(getBundleEnd(MI)) {} + + /// Return a reference to the basic block containing this bundle. + MachineBasicBlock &getMBB() const { return MBB; } + + /// Return true if no instructions have been inserted in this bundle yet. + /// Empty bundles aren't representable in a MachineBasicBlock. + bool empty() const { return Begin == End; } + + /// Return an iterator to the first bundled instruction. + MachineBasicBlock::instr_iterator begin() const { return Begin; } + + /// Return an iterator beyond the last bundled instruction. + MachineBasicBlock::instr_iterator end() const { return End; } + + /// Insert MI into this bundle before I which must point to an instruction in + /// the bundle, or end(). + MIBundleBuilder &insert(MachineBasicBlock::instr_iterator I, + MachineInstr *MI) { + MBB.insert(I, MI); + if (I == Begin) { + if (!empty()) + MI->bundleWithSucc(); + Begin = MI; + return *this; + } + if (I == End) { + MI->bundleWithPred(); + return *this; + } + // MI was inserted in the middle of the bundle, so its neighbors' flags are + // already fine. Update MI's bundle flags manually. + MI->setFlag(MachineInstr::BundledPred); + MI->setFlag(MachineInstr::BundledSucc); + return *this; + } + + /// Insert MI into MBB by prepending it to the instructions in the bundle. + /// MI will become the first instruction in the bundle. + MIBundleBuilder &prepend(MachineInstr *MI) { + return insert(begin(), MI); + } + + /// Insert MI into MBB by appending it to the instructions in the bundle. + /// MI will become the last instruction in the bundle. + MIBundleBuilder &append(MachineInstr *MI) { + return insert(end(), MI); + } +}; } // End llvm namespace diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 854ba06209cd..9519edb3ebae 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -45,18 +45,36 @@ bool finalizeBundles(MachineFunction &MF); /// inline MachineInstr *getBundleStart(MachineInstr *MI) { MachineBasicBlock::instr_iterator I = MI; - while (I->isInsideBundle()) + while (I->isBundledWithPred()) --I; return I; } inline const MachineInstr *getBundleStart(const MachineInstr *MI) { MachineBasicBlock::const_instr_iterator I = MI; - while (I->isInsideBundle()) + while (I->isBundledWithPred()) --I; return I; } +/// Return an iterator pointing beyond the bundle containing MI. +inline MachineBasicBlock::instr_iterator +getBundleEnd(MachineInstr *MI) { + MachineBasicBlock::instr_iterator I = MI; + while (I->isBundledWithSucc()) + ++I; + return ++I; +} + +/// Return an iterator pointing beyond the bundle containing MI. +inline MachineBasicBlock::const_instr_iterator +getBundleEnd(const MachineInstr *MI) { + MachineBasicBlock::const_instr_iterator I = MI; + while (I->isBundledWithSucc()) + ++I; + return ++I; +} + //===----------------------------------------------------------------------===// // MachineOperand iterator // @@ -149,16 +167,13 @@ public: /// PhysRegInfo - Information about a physical register used by a set of /// operands. struct PhysRegInfo { - /// Clobbers - Reg or an overlapping register is defined, or a regmask + /// Clobbers - Reg or an overlapping register is defined, or a regmask /// clobbers Reg. bool Clobbers; /// Defines - Reg or a super-register is defined. bool Defines; - /// DefinesOverlap - Reg or an overlapping register is defined. - bool DefinesOverlap; - /// Reads - Read or a super-register is read. bool Reads; diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h index 928145d279b6..adcd1d0de63d 100644 --- a/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -20,8 +20,8 @@ #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H -#include #include +#include namespace llvm { diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index d53f041128ac..b058ecb4c279 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -27,11 +27,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_MACHINE_LOOP_INFO_H -#define LLVM_CODEGEN_MACHINE_LOOP_INFO_H +#ifndef LLVM_CODEGEN_MACHINELOOPINFO_H +#define LLVM_CODEGEN_MACHINELOOPINFO_H -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { diff --git a/include/llvm/CodeGen/MachineLoopRanges.h b/include/llvm/CodeGen/MachineLoopRanges.h deleted file mode 100644 index 6a30e8b53c09..000000000000 --- a/include/llvm/CodeGen/MachineLoopRanges.h +++ /dev/null @@ -1,112 +0,0 @@ -//===- MachineLoopRanges.h - Ranges of machine loops -----------*- c++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides the interface to the MachineLoopRanges analysis. -// -// Provide on-demand information about the ranges of machine instructions -// covered by a loop. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINELOOPRANGES_H -#define LLVM_CODEGEN_MACHINELOOPRANGES_H - -#include "llvm/ADT/IntervalMap.h" -#include "llvm/CodeGen/SlotIndexes.h" - -namespace llvm { - -class MachineLoop; -class MachineLoopInfo; -class raw_ostream; - -/// MachineLoopRange - Range information for a single loop. -class MachineLoopRange { - friend class MachineLoopRanges; - -public: - typedef IntervalMap Map; - typedef Map::Allocator Allocator; - -private: - /// The mapped loop. - const MachineLoop *const Loop; - - /// Map intervals to a bit mask. - /// Bit 0 = inside loop block. - Map Intervals; - - /// Loop area as measured by SlotIndex::distance. - unsigned Area; - - /// Create a MachineLoopRange, only accessible to MachineLoopRanges. - MachineLoopRange(const MachineLoop*, Allocator&, SlotIndexes&); - -public: - /// getLoop - Return the mapped machine loop. - const MachineLoop *getLoop() const { return Loop; } - - /// overlaps - Return true if this loop overlaps the given range of machine - /// inteructions. - bool overlaps(SlotIndex Start, SlotIndex Stop); - - /// getNumber - Return the loop number. This is the same as the number of the - /// header block. - unsigned getNumber() const; - - /// getArea - Return the loop area. This number is approximately proportional - /// to the number of instructions in the loop. - unsigned getArea() const { return Area; } - - /// getMap - Allow public read-only access for IntervalMapOverlaps. - const Map &getMap() { return Intervals; } - - /// print - Print loop ranges on OS. - void print(raw_ostream&) const; - - /// byNumber - Comparator for array_pod_sort that sorts a list of - /// MachineLoopRange pointers by number. - static int byNumber(const void*, const void*); - - /// byAreaDesc - Comparator for array_pod_sort that sorts a list of - /// MachineLoopRange pointers by descending area, then by number. - static int byAreaDesc(const void*, const void*); -}; - -raw_ostream &operator<<(raw_ostream&, const MachineLoopRange&); - -/// MachineLoopRanges - Analysis pass that provides on-demand per-loop range -/// information. -class MachineLoopRanges : public MachineFunctionPass { - typedef DenseMap CacheMap; - typedef MachineLoopRange::Allocator MapAllocator; - - MapAllocator Allocator; - SlotIndexes *Indexes; - CacheMap Cache; - -public: - static char ID; // Pass identification, replacement for typeid - - MachineLoopRanges() : MachineFunctionPass(ID), Indexes(0) {} - ~MachineLoopRanges() { releaseMemory(); } - - /// getLoopRange - Return the range of loop. - MachineLoopRange *getLoopRange(const MachineLoop *Loop); - -private: - virtual bool runOnMachineFunction(MachineFunction&); - virtual void releaseMemory(); - virtual void getAnalysisUsage(AnalysisUsage&) const; -}; - - -} // end namespace llvm - -#endif // LLVM_CODEGEN_MACHINELOOPRANGES_H diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 6b88d4a9499b..a3acec809547 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -31,19 +31,18 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H #define LLVM_CODEGEN_MACHINEMODULEINFO_H -#include "llvm/Pass.h" -#include "llvm/GlobalValue.h" -#include "llvm/Metadata.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Support/ValueHandle.h" -#include "llvm/Support/DataTypes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Pass.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ValueHandle.h" namespace llvm { @@ -180,8 +179,9 @@ public: const MCObjectFileInfo *MOFI); ~MachineModuleInfo(); - bool doInitialization(); - bool doFinalization(); + // Initialization and Finalization + virtual bool doInitialization(Module &); + virtual bool doFinalization(Module &); /// EndFunction - Discard function meta information. /// @@ -295,7 +295,7 @@ public: /// isUsedFunction - Return true if the functions in the llvm.used list. This /// does not return true for things in llvm.compiler.used unless they are also /// in llvm.used. - bool isUsedFunction(const Function *F) { + bool isUsedFunction(const Function *F) const { return UsedFunctions.count(F); } @@ -372,7 +372,7 @@ public: /// getCurrentCallSite - Get the call site currently being processed, if any. /// return zero if none. - unsigned getCurrentCallSite(void) { return CurCallSite; } + unsigned getCurrentCallSite() { return CurCallSite; } /// getTypeInfos - Return a reference to the C++ typeinfo for the current /// function. diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 606833cd4081..414770b9ecf0 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -35,6 +35,11 @@ class MCSymbol; /// MachineOperand class - Representation of each machine instruction operand. /// +/// This class isn't a POD type because it has a private constructor, but its +/// destructor must be trivial. Functions like MachineInstr::addOperand(), +/// MachineRegisterInfo::moveOperands(), and MF::DeleteMachineInstr() depend on +/// not having to call the MachineOperand destructor. +/// class MachineOperand { public: enum MachineOperandType { @@ -60,15 +65,11 @@ private: /// union. unsigned char OpKind; // MachineOperandType - // This union is discriminated by OpKind. - union { - /// SubReg - Subregister number, only valid for MO_Register. A value of 0 - /// indicates the MO_Register has no subReg. - unsigned char SubReg; - - /// TargetFlags - This is a set of target-specific operand flags. - unsigned char TargetFlags; - }; + /// Subregister number for MO_Register. A value of 0 indicates the + /// MO_Register has no subReg. + /// + /// For all other kinds of operands, this field holds target-specific flags. + unsigned SubReg_TargetFlags : 12; /// TiedTo - Non-zero when this register operand is tied to another register /// operand. The encoding of this field is described in the block comment @@ -176,24 +177,25 @@ private: } OffsetedInfo; } Contents; - explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) { - TargetFlags = 0; - } + explicit MachineOperand(MachineOperandType K) + : OpKind(K), SubReg_TargetFlags(0), ParentMI(0) {} public: /// getType - Returns the MachineOperandType for this operand. /// MachineOperandType getType() const { return (MachineOperandType)OpKind; } - unsigned char getTargetFlags() const { - return isReg() ? 0 : TargetFlags; + unsigned getTargetFlags() const { + return isReg() ? 0 : SubReg_TargetFlags; } - void setTargetFlags(unsigned char F) { + void setTargetFlags(unsigned F) { assert(!isReg() && "Register operands can't have target flags"); - TargetFlags = F; + SubReg_TargetFlags = F; + assert(SubReg_TargetFlags == F && "Target flags out of range"); } - void addTargetFlag(unsigned char F) { + void addTargetFlag(unsigned F) { assert(!isReg() && "Register operands can't have target flags"); - TargetFlags |= F; + SubReg_TargetFlags |= F; + assert((SubReg_TargetFlags & F) && "Target flags out of range"); } @@ -261,7 +263,7 @@ public: unsigned getSubReg() const { assert(isReg() && "Wrong MachineOperand accessor"); - return (unsigned)SubReg; + return SubReg_TargetFlags; } bool isUse() const { @@ -336,7 +338,8 @@ public: void setSubReg(unsigned subReg) { assert(isReg() && "Wrong MachineOperand accessor"); - SubReg = (unsigned char)subReg; + SubReg_TargetFlags = subReg; + assert(SubReg_TargetFlags == subReg && "SubReg out of range"); } /// substVirtReg - Substitute the current register with the virtual @@ -574,7 +577,7 @@ public: Op.SmallContents.RegNo = Reg; Op.Contents.Reg.Prev = 0; Op.Contents.Reg.Next = 0; - Op.SubReg = SubReg; + Op.setSubReg(SubReg); return Op; } static MachineOperand CreateMBB(MachineBasicBlock *MBB, diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h index a9fc8434abee..ca09aef82616 100644 --- a/include/llvm/CodeGen/MachinePostDominators.h +++ b/include/llvm/CodeGen/MachinePostDominators.h @@ -15,10 +15,9 @@ #ifndef LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H #define LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -55,26 +54,27 @@ public: return DT->getNode(BB); } - bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { + bool dominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { return DT->dominates(A, B); } - bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { return DT->dominates(A, B); } - bool - properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { + bool properlyDominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { return DT->properlyDominates(A, B); } - bool - properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + bool properlyDominates(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { return DT->properlyDominates(A, B); } MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { + MachineBasicBlock *B) { return DT->findNearestCommonDominator(A, B); } diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 4e86363f071a..4b43cc10951a 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -14,10 +14,10 @@ #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H #define LLVM_CODEGEN_MACHINEREGISTERINFO_H -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Target/TargetRegisterInfo.h" #include namespace llvm { @@ -99,13 +99,11 @@ class MachineRegisterInfo { /// started. BitVector ReservedRegs; - /// LiveIns/LiveOuts - Keep track of the physical registers that are - /// livein/liveout of the function. Live in values are typically arguments in - /// registers, live out values are typically return values in registers. - /// LiveIn values are allowed to have virtual registers associated with them, - /// stored in the second element. + /// Keep track of the physical registers that are live in to the function. + /// Live in values are typically arguments in registers. LiveIn values are + /// allowed to have virtual registers associated with them, stored in the + /// second element. std::vector > LiveIns; - std::vector LiveOuts; MachineRegisterInfo(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION; void operator=(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION; @@ -156,6 +154,9 @@ public: // Strictly for use by MachineInstr.cpp. void removeRegOperandFromUseList(MachineOperand *MO); + // Strictly for use by MachineInstr.cpp. + void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps); + /// reg_begin/reg_end - Provide iteration support to walk over all definitions /// and uses of a register within the MachineFunction that corresponds to this /// MachineRegisterInfo object. @@ -376,6 +377,12 @@ public: return false; } + /// Mark the specified register unit as used in this function. + /// This should only be called during and after register allocation. + void setRegUnitUsed(unsigned RegUnit) { + UsedRegUnits.set(RegUnit); + } + /// setPhysRegUsed - Mark the specified register used in this function. /// This should only be called during and after register allocation. void setPhysRegUsed(unsigned Reg) { @@ -457,30 +464,24 @@ public: } //===--------------------------------------------------------------------===// - // LiveIn/LiveOut Management + // LiveIn Management //===--------------------------------------------------------------------===// - /// addLiveIn/Out - Add the specified register as a live in/out. Note that it + /// addLiveIn - Add the specified register as a live-in. Note that it /// is an error to add the same register to the same set more than once. void addLiveIn(unsigned Reg, unsigned vreg = 0) { LiveIns.push_back(std::make_pair(Reg, vreg)); } - void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); } - // Iteration support for live in/out sets. These sets are kept in sorted - // order by their register number. + // Iteration support for the live-ins set. It's kept in sorted order + // by register number. typedef std::vector >::const_iterator livein_iterator; - typedef std::vector::const_iterator liveout_iterator; livein_iterator livein_begin() const { return LiveIns.begin(); } livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } - liveout_iterator liveout_begin() const { return LiveOuts.begin(); } - liveout_iterator liveout_end() const { return LiveOuts.end(); } - bool liveout_empty() const { return LiveOuts.empty(); } bool isLiveIn(unsigned Reg) const; - bool isLiveOut(unsigned Reg) const; /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 31bd606f9320..57febe77464c 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -24,8 +24,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MACHINESCHEDULER_H -#define MACHINESCHEDULER_H +#ifndef LLVM_CODEGEN_MACHINESCHEDULER_H +#define LLVM_CODEGEN_MACHINESCHEDULER_H #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/RegisterPressure.h" @@ -43,6 +43,7 @@ class MachineDominatorTree; class MachineLoopInfo; class RegisterClassInfo; class ScheduleDAGInstrs; +class SchedDFSResult; /// MachineSchedContext provides enough context from the MachineScheduler pass /// for the target to instantiate a scheduler. @@ -119,6 +120,9 @@ public: /// be scheduled at the bottom. virtual SUnit *pickNode(bool &IsTopNode) = 0; + /// \brief Scheduler callback to notify that a new subtree is scheduled. + virtual void scheduleTree(unsigned SubtreeID) {} + /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an /// instruction and updated scheduled/remaining flags in the DAG nodes. virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; @@ -164,6 +168,8 @@ public: iterator end() { return Queue.end(); } + ArrayRef elements() { return Queue; } + iterator find(SUnit *SU) { return std::find(Queue.begin(), Queue.end(), SU); } @@ -181,7 +187,7 @@ public: return Queue.begin() + idx; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(); #endif }; @@ -202,6 +208,15 @@ protected: RegisterClassInfo *RegClassInfo; MachineSchedStrategy *SchedImpl; + /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees + /// will be empty. + SchedDFSResult *DFSResult; + BitVector ScheduledTrees; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + /// Ordered list of DAG postprocessing steps. std::vector Mutations; @@ -226,6 +241,10 @@ protected: IntervalPressure BotPressure; RegPressureTracker BotRPTracker; + /// Record the next node in a scheduled cluster. + const SUnit *NextClusterPred; + const SUnit *NextClusterSucc; + #ifndef NDEBUG /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. @@ -235,25 +254,33 @@ protected: public: ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), - RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure) { + AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0), + Topo(SUnits, &ExitSU), RPTracker(RegPressure), CurrentTop(), + TopRPTracker(TopPressure), CurrentBottom(), BotRPTracker(BotPressure), + NextClusterPred(NULL), NextClusterSucc(NULL) { #ifndef NDEBUG NumInstrsScheduled = 0; #endif } - virtual ~ScheduleDAGMI() { - delete SchedImpl; - } + virtual ~ScheduleDAGMI(); /// Add a postprocessing step to the DAG builder. /// Mutations are applied in the order that they are added after normal DAG /// building and before MachineSchedStrategy initialization. + /// + /// ScheduleDAGMI takes ownership of the Mutation object. void addMutation(ScheduleDAGMutation *Mutation) { Mutations.push_back(Mutation); } + /// \brief Add a DAG edge to the given SU with the given predecessor + /// dependence data. + /// + /// \returns true if the edge may be added without creating a cycle OR if an + /// equivalent edge already existed (false indicates failure). + bool addEdge(SUnit *SuccSU, const SDep &PredDep); + MachineBasicBlock::iterator top() const { return CurrentTop; } MachineBasicBlock::iterator bottom() const { return CurrentBottom; } @@ -285,6 +312,22 @@ public: return RegionCriticalPSets; } + const SUnit *getNextClusterPred() const { return NextClusterPred; } + + const SUnit *getNextClusterSucc() const { return NextClusterSucc; } + + /// Compute a DFSResult after DAG building is complete, and before any + /// queue comparisons. + void computeDFSResult(); + + /// Return a non-null DFS result if the scheduling strategy initialized it. + const SchedDFSResult *getDFSResult() const { return DFSResult; } + + BitVector &getScheduledTrees() { return ScheduledTrees; } + + void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE; + void viewGraph() LLVM_OVERRIDE; + protected: // Top-Level entry points for the schedule() driver... @@ -298,8 +341,8 @@ protected: /// instances of ScheduleDAGMI to perform custom DAG postprocessing. void postprocessDAG(); - /// Identify DAG roots and setup scheduler queues. - void initQueues(); + /// Release ExitSU predecessors and setup scheduler queues. + void initQueues(ArrayRef TopRoots, ArrayRef BotRoots); /// Move an instruction and update register pressure. void scheduleMI(SUnit *SU, bool IsTopNode); @@ -317,12 +360,13 @@ protected: void initRegPressure(); - void updateScheduledPressure(std::vector NewMaxPressure); + void updateScheduledPressure(const std::vector &NewMaxPressure); void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); bool checkSchedLimit(); - void releaseRoots(); + void findRootsAndBiasEdges(SmallVectorImpl &TopRoots, + SmallVectorImpl &BotRoots); void releaseSucc(SUnit *SU, SDep *SuccEdge); void releaseSuccessors(SUnit *SU); diff --git a/lib/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h similarity index 85% rename from lib/CodeGen/MachineTraceMetrics.h rename to include/llvm/CodeGen/MachineTraceMetrics.h index 460730b04059..2775a0485821 100644 --- a/lib/CodeGen/MachineTraceMetrics.h +++ b/include/llvm/CodeGen/MachineTraceMetrics.h @@ -107,6 +107,13 @@ public: /// Get the fixed resource information about MBB. Compute it on demand. const FixedBlockInfo *getResources(const MachineBasicBlock*); + /// Get the scaled number of cycles used per processor resource in MBB. + /// This is an array with SchedModel.getNumProcResourceKinds() entries. + /// The getResources() function above must have been called first. + /// + /// These numbers have already been scaled by SchedModel.getResourceFactor(). + ArrayRef getProcResourceCycles(unsigned MBBNum) const; + /// A virtual register or regunit required by a basic block or its trace /// successors. struct LiveInReg { @@ -165,12 +172,25 @@ public: /// Invalidate height resources when a block below this one has changed. void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } - /// Determine if this block belongs to the same trace as TBI and comes - /// before it in the trace. + /// Assuming that this is a dominator of TBI, determine if it contains + /// useful instruction depths. A dominating block can be above the current + /// trace head, and any dependencies from such a far away dominator are not + /// expected to affect the critical path. + /// /// Also returns true when TBI == this. - bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { - return hasValidDepth() && TBI.hasValidDepth() && - Head == TBI.Head && InstrDepth <= TBI.InstrDepth; + bool isUsefulDominator(const TraceBlockInfo &TBI) const { + // The trace for TBI may not even be calculated yet. + if (!hasValidDepth() || !TBI.hasValidDepth()) + return false; + // Instruction depths are only comparable if the traces share a head. + if (Head != TBI.Head) + return false; + // It is almost always the case that TBI belongs to the same trace as + // this block, but rare convoluted cases involving irreducible control + // flow, a dominator may share a trace head without actually being on the + // same trace as TBI. This is not a big problem as long as it doesn't + // increase the instruction depth. + return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth; } // Data-dependency-related information. Per-instruction depth and height @@ -271,6 +291,8 @@ public: class Ensemble { SmallVector BlockInfo; DenseMap Cycles; + SmallVector ProcResourceDepths; + SmallVector ProcResourceHeights; friend class Trace; void computeTrace(const MachineBasicBlock*); @@ -290,6 +312,8 @@ public: const MachineLoop *getLoopFor(const MachineBasicBlock*) const; const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + ArrayRef getProcResourceDepths(unsigned MBBNum) const; + ArrayRef getProcResourceHeights(unsigned MBBNum) const; public: virtual ~Ensemble(); @@ -330,8 +354,22 @@ private: // One entry per basic block, indexed by block number. SmallVector BlockInfo; + // Cycles consumed on each processor resource per block. + // The number of processor resource kinds is constant for a given subtarget, + // but it is not known at compile time. The number of cycles consumed by + // block B on processor resource R is at ProcResourceCycles[B*Kinds + R] + // where Kinds = SchedModel.getNumProcResourceKinds(). + SmallVector ProcResourceCycles; + // One ensemble per strategy. Ensemble* Ensembles[TS_NumStrategies]; + + // Convert scaled resource usage to a cycle count that can be compared with + // latencies. + unsigned getCycles(unsigned Scaled) { + unsigned Factor = SchedModel.getLatencyFactor(); + return (Scaled + Factor - 1) / Factor; + } }; inline raw_ostream &operator<<(raw_ostream &OS, diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index 83c379b48cba..85bf511d6022 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -16,10 +16,10 @@ #define LLVM_CODEGEN_PBQP_GRAPH_H #include "Math.h" - +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include #include -#include namespace PBQP { diff --git a/include/llvm/CodeGen/PBQP/HeuristicSolver.h b/include/llvm/CodeGen/PBQP/HeuristicSolver.h index 35514f967478..47e15b27e744 100644 --- a/include/llvm/CodeGen/PBQP/HeuristicSolver.h +++ b/include/llvm/CodeGen/PBQP/HeuristicSolver.h @@ -18,8 +18,8 @@ #include "Graph.h" #include "Solution.h" -#include #include +#include namespace PBQP { diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h index a859e5899f06..307d81e1d161 100644 --- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h +++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h @@ -18,9 +18,8 @@ #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H -#include "../HeuristicSolver.h" #include "../HeuristicBase.h" - +#include "../HeuristicSolver.h" #include namespace PBQP { diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index e7598bf3e3f1..08f8b981ae27 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PBQP_MATH_H +#ifndef LLVM_CODEGEN_PBQP_MATH_H #define LLVM_CODEGEN_PBQP_MATH_H -#include #include +#include #include namespace PBQP { diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h index 57d9b95fc3b1..b9f288bbeeb4 100644 --- a/include/llvm/CodeGen/PBQP/Solution.h +++ b/include/llvm/CodeGen/PBQP/Solution.h @@ -14,9 +14,8 @@ #ifndef LLVM_CODEGEN_PBQP_SOLUTION_H #define LLVM_CODEGEN_PBQP_SOLUTION_H -#include "Math.h" #include "Graph.h" - +#include "Math.h" #include namespace PBQP { diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 7bd576494ef7..fc8aa75ddfeb 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -25,6 +25,7 @@ namespace llvm { class MachineFunctionPass; class PassInfo; class PassManagerBase; + class TargetLoweringBase; class TargetLowering; class TargetRegisterClass; class raw_ostream; @@ -141,6 +142,10 @@ public: /// Add passes to lower exception handling for the code generator. void addPassesToHandleExceptions(); + /// Add pass to prepare the LLVM IR for code generation. This should be done + /// before exception handling preparation passes. + virtual void addCodeGenPrepare(); + /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. virtual void addISelPrepare(); @@ -176,6 +181,16 @@ protected: /// instructions in SSA form. virtual void addMachineSSAOptimization(); + /// Add passes that optimize instruction level parallelism for out-of-order + /// targets. These passes are run while the machine code is still in SSA + /// form, so they can use MachineTraceMetrics to control their heuristics. + /// + /// All passes added here should preserve the MachineDominatorTree, + /// MachineLoopInfo, and MachineTraceMetrics analyses. + virtual bool addILPOpts() { + return false; + } + /// addPreRegAlloc - This method may be implemented by targets that want to /// run passes immediately before register allocation. This should return /// true if -print-machineinstrs should print after these passes. @@ -237,6 +252,11 @@ protected: return false; } + /// addGCPasses - Add late codegen passes that analyze code for garbage + /// collection. This should return true if GC info should be printed after + /// these passes. + virtual bool addGCPasses(); + /// Add standard basic block placement passes. virtual void addBlockPlacement(); @@ -271,6 +291,13 @@ protected: /// List of target independent CodeGen pass IDs. namespace llvm { + /// \brief Create a basic TargetTransformInfo analysis pass. + /// + /// This pass implements the target transform info analysis using the target + /// independent information available to the LLVM code generator. + ImmutablePass * + createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI); + /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a /// block that cannot be reached?). As such, a code generator should either @@ -288,9 +315,6 @@ namespace llvm { /// MachineLoopInfo - This pass is a loop analysis pass. extern char &MachineLoopInfoID; - /// MachineLoopRanges - This pass is an on-demand loop coverage analysis. - extern char &MachineLoopRangesID; - /// MachineDominators - This pass is a machine dominators analysis pass. extern char &MachineDominatorsID; @@ -420,10 +444,6 @@ namespace llvm { /// information. extern char &MachineBlockPlacementStatsID; - /// Code Placement - This pass optimize code placement and aligns loop - /// headers to target specific alignment boundary. - extern char &CodePlacementOptID; - /// GCLowering Pass - Performs target-independent LLVM IR transformations for /// highly portable strategies. /// @@ -435,10 +455,6 @@ namespace llvm { /// branch folding). extern char &GCMachineCodeAnalysisID; - /// Deleter Pass - Releases GC metadata. - /// - FunctionPass *createGCInfoDeleter(); - /// Creates a pass to print GC metadata. /// FunctionPass *createGCInfoPrinter(raw_ostream &OS); @@ -469,7 +485,7 @@ namespace llvm { /// createStackProtectorPass - This pass adds stack protectors to functions. /// - FunctionPass *createStackProtectorPass(const TargetLowering *tli); + FunctionPass *createStackProtectorPass(const TargetLoweringBase *tli); /// createMachineVerifierPass - This pass verifies cenerated machine code /// instructions for correctness. @@ -483,7 +499,7 @@ namespace llvm { /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. /// - FunctionPass *createSjLjEHPreparePass(const TargetLowering *tli); + FunctionPass *createSjLjEHPreparePass(const TargetLoweringBase *tli); /// LocalStackSlotAllocation - This pass assigns local frame indices to stack /// slots relative to one another and allocates base registers to access them diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h index 8f52d3bf47d2..df74d08888bb 100644 --- a/include/llvm/CodeGen/PseudoSourceValue.h +++ b/include/llvm/CodeGen/PseudoSourceValue.h @@ -14,7 +14,7 @@ #ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H #define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H -#include "llvm/Value.h" +#include "llvm/IR/Value.h" namespace llvm { class MachineFrameInfo; diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index acfc07dd31a2..b617c145585c 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/PBQP/Graph.h" #include "llvm/CodeGen/PBQP/Solution.h" - #include #include @@ -29,6 +28,7 @@ namespace llvm { class LiveIntervals; class MachineFunction; class MachineLoopInfo; + class TargetRegisterInfo; /// This class wraps up a PBQP instance representing a register allocation /// problem, plus the structures necessary to map back from the PBQP solution diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h index 100e357654fb..ca495778446f 100644 --- a/include/llvm/CodeGen/RegAllocRegistry.h +++ b/include/llvm/CodeGen/RegAllocRegistry.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGENREGALLOCREGISTRY_H -#define LLVM_CODEGENREGALLOCREGISTRY_H +#ifndef LLVM_CODEGEN_REGALLOCREGISTRY_H +#define LLVM_CODEGEN_REGALLOCREGISTRY_H #include "llvm/CodeGen/MachinePassRegistry.h" diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index 4467b62f2370..3ad22e65c8c7 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -29,10 +29,15 @@ class RegisterClassInfo { unsigned Tag; unsigned NumRegs; bool ProperSubClass; - OwningArrayPtr Order; + uint8_t MinCost; + uint16_t LastCostChange; + OwningArrayPtr Order; - RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {} - operator ArrayRef() const { + RCInfo() + : Tag(0), NumRegs(0), ProperSubClass(false), MinCost(0), + LastCostChange(0) {} + + operator ArrayRef() const { return makeArrayRef(Order.get(), NumRegs); } }; @@ -84,7 +89,7 @@ public: /// getOrder - Returns the preferred allocation order for RC. The order /// contains no reserved registers, and registers that alias callee saved /// registers come last. - ArrayRef getOrder(const TargetRegisterClass *RC) const { + ArrayRef getOrder(const TargetRegisterClass *RC) const { return get(RC); } @@ -106,6 +111,21 @@ public: return CalleeSaved[N-1]; return 0; } + + /// Get the minimum register cost in RC's allocation order. + /// This is the smallest value returned by TRI->getCostPerUse(Reg) for all + /// the registers in getOrder(RC). + unsigned getMinCost(const TargetRegisterClass *RC) { + return get(RC).MinCost; + } + + /// Get the position of the last cost change in getOrder(RC). + /// + /// All registers in getOrder(RC).slice(getLastCostChange(RC)) will have the + /// same cost according to TRI->getCostPerUse(). + unsigned getLastCostChange(const TargetRegisterClass *RC) { + return get(RC).LastCostChange; + } }; } // end namespace llvm diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 30326d05df04..267018074677 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -15,13 +15,14 @@ #ifndef LLVM_CODEGEN_REGISTERPRESSURE_H #define LLVM_CODEGEN_REGISTERPRESSURE_H +#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SparseSet.h" namespace llvm { class LiveIntervals; +class LiveInterval; class RegisterClassInfo; class MachineInstr; @@ -30,18 +31,24 @@ struct RegisterPressure { /// Map of max reg pressure indexed by pressure set ID, not class ID. std::vector MaxSetPressure; - /// List of live in registers. + /// List of live in virtual registers or physical register units. SmallVector LiveInRegs; SmallVector LiveOutRegs; /// Increase register pressure for each pressure set impacted by this register /// class. Normally called by RegPressureTracker, but may be called manually /// to account for live through (global liveness). - void increase(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI); + /// + /// \param Reg is either a virtual register number or register unit number. + void increase(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI); /// Decrease register pressure for each pressure set impacted by this register /// class. This is only useful to account for spilling or rematerialization. - void decrease(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI); + /// + /// \param Reg is either a virtual register number or register unit number. + void decrease(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI); void dump(const TargetRegisterInfo *TRI) const; }; @@ -116,6 +123,33 @@ struct RegPressureDelta { RegPressureDelta() {} }; +/// \brief A set of live virtual registers and physical register units. +/// +/// Virtual and physical register numbers require separate sparse sets, but most +/// of the RegisterPressureTracker handles them uniformly. +struct LiveRegSet { + SparseSet PhysRegs; + SparseSet VirtRegs; + + bool contains(unsigned Reg) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return VirtRegs.count(Reg); + return PhysRegs.count(Reg); + } + + bool insert(unsigned Reg) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return VirtRegs.insert(Reg).second; + return PhysRegs.insert(Reg).second; + } + + bool erase(unsigned Reg) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return VirtRegs.erase(Reg); + return PhysRegs.erase(Reg); + } +}; + /// Track the current register pressure at some position in the instruction /// stream, and remember the high water mark within the region traversed. This /// does not automatically consider live-through ranges. The client may @@ -150,15 +184,15 @@ class RegPressureTracker { bool RequireIntervals; /// Register pressure corresponds to liveness before this instruction - /// iterator. It may point to the end of the block rather than an instruction. + /// iterator. It may point to the end of the block or a DebugValue rather than + /// an instruction. MachineBasicBlock::const_iterator CurrPos; /// Pressure map indexed by pressure set ID, not class ID. std::vector CurrSetPressure; - /// List of live registers. - SparseSet LivePhysRegs; - SparseSet LiveVirtRegs; + /// Set of live registers. + LiveRegSet LiveRegs; public: RegPressureTracker(IntervalPressure &rp) : @@ -171,8 +205,9 @@ public: const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos); - /// Force liveness of registers. Particularly useful to initialize the - /// livein/out state of the tracker before the first call to advance/recede. + /// Force liveness of virtual registers or physical register + /// units. Particularly useful to initialize the livein/out state of the + /// tracker before the first call to advance/recede. void addLiveRegs(ArrayRef Regs); /// Get the MI position corresponding to this register pressure. @@ -184,6 +219,10 @@ public: // position changes while pressure does not. void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; } + /// \brief Get the SlotIndex for the first nondebug instruction including or + /// after the current position. + SlotIndex getCurrSlot() const; + /// Recede across the previous instruction. bool recede(); @@ -203,11 +242,8 @@ public: /// than the pressure across the traversed region. std::vector &getRegSetPressureAtPos() { return CurrSetPressure; } - void discoverPhysLiveIn(unsigned Reg); - void discoverPhysLiveOut(unsigned Reg); - - void discoverVirtLiveIn(unsigned Reg); - void discoverVirtLiveOut(unsigned Reg); + void discoverLiveOut(unsigned Reg); + void discoverLiveIn(unsigned Reg); bool isTopClosed() const; bool isBottomClosed() const; @@ -268,12 +304,13 @@ public: return getDownwardPressure(MI, PressureResult, MaxPressureResult); } -protected: - void increasePhysRegPressure(ArrayRef Regs); - void decreasePhysRegPressure(ArrayRef Regs); + void dump() const; - void increaseVirtRegPressure(ArrayRef Regs); - void decreaseVirtRegPressure(ArrayRef Regs); +protected: + const LiveInterval *getInterval(unsigned Reg) const; + + void increaseRegPressure(ArrayRef Regs); + void decreaseRegPressure(ArrayRef Regs); void bumpUpwardPressure(const MachineInstr *MI); void bumpDownwardPressure(const MachineInstr *MI); diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 08d316992ec5..95bf29167c20 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_REGISTER_SCAVENGING_H -#define LLVM_CODEGEN_REGISTER_SCAVENGING_H +#ifndef LLVM_CODEGEN_REGISTERSCAVENGING_H +#define LLVM_CODEGEN_REGISTERSCAVENGING_H +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/ADT/BitVector.h" namespace llvm { @@ -40,21 +40,23 @@ class RegScavenger { /// registers. bool Tracking; - /// ScavengingFrameIndex - Special spill slot used for scavenging a register - /// post register allocation. - int ScavengingFrameIndex; + /// Information on scavenged registers (held in a spill slot). + struct ScavengedInfo { + ScavengedInfo(int FI = -1) : FrameIndex(FI), Reg(0), Restore(NULL) {} - /// ScavengedReg - If none zero, the specific register is currently being - /// scavenged. That is, it is spilled to the special scavenging stack slot. - unsigned ScavengedReg; + /// A spill slot used for scavenging a register post register allocation. + int FrameIndex; - /// ScavengedRC - Register class of the scavenged register. - /// - const TargetRegisterClass *ScavengedRC; + /// If non-zero, the specific register is currently being + /// scavenged. That is, it is spilled to this scavenging stack slot. + unsigned Reg; - /// ScavengeRestore - Instruction that restores the scavenged register from - /// stack. - const MachineInstr *ScavengeRestore; + /// The instruction that restores the scavenged register from stack. + const MachineInstr *Restore; + }; + + /// A vector of information on scavenged registers. + SmallVector Scavenged; /// CalleeSavedrRegs - A bitvector of callee saved registers for the target. /// @@ -71,8 +73,7 @@ class RegScavenger { public: RegScavenger() - : MBB(NULL), NumPhysRegs(0), Tracking(false), - ScavengingFrameIndex(-1), ScavengedReg(0), ScavengedRC(NULL) {} + : MBB(NULL), NumPhysRegs(0), Tracking(false) {} /// enterBasicBlock - Start tracking liveness from the begin of the specific /// basic block. @@ -92,9 +93,25 @@ public: while (MBBI != I) forward(); } + /// Invert the behavior of forward() on the current instruction (undo the + /// changes to the available registers made by forward()). + void unprocess(); + + /// Unprocess instructions until you reach the provided iterator. + void unprocess(MachineBasicBlock::iterator I) { + while (MBBI != I) unprocess(); + } + /// skipTo - Move the internal MBB iterator but do not update register states. - /// - void skipTo(MachineBasicBlock::iterator I) { MBBI = I; } + void skipTo(MachineBasicBlock::iterator I) { + if (I == MachineBasicBlock::iterator(NULL)) + Tracking = false; + MBBI = I; + } + + MachineBasicBlock::iterator getCurrentPosition() const { + return MBBI; + } /// getRegsUsed - return all registers currently in use in used. void getRegsUsed(BitVector &used, bool includeReserved); @@ -107,10 +124,28 @@ public: /// Return 0 if none is found. unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const; - /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of - /// ScavengingFrameIndex. - void setScavengingFrameIndex(int FI) { ScavengingFrameIndex = FI; } - int getScavengingFrameIndex() const { return ScavengingFrameIndex; } + /// Add a scavenging frame index. + void addScavengingFrameIndex(int FI) { + Scavenged.push_back(ScavengedInfo(FI)); + } + + /// Query whether a frame index is a scavenging frame index. + bool isScavengingFrameIndex(int FI) const { + for (SmallVector::const_iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) + if (I->FrameIndex == FI) + return true; + + return false; + } + + /// Get an array of scavenging frame indices. + void getScavengingFrameIndices(SmallVectorImpl &A) const { + for (SmallVector::const_iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) + if (I->FrameIndex >= 0) + A.push_back(I->FrameIndex); + } /// scavengeRegister - Make a register of the specific register class /// available and do the appropriate bookkeeping. SPAdj is the stack @@ -129,10 +164,12 @@ private: /// isReserved - Returns true if a register is reserved. It is never "unused". bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); } - /// isUsed / isUnused - Test if a register is currently being used. + /// isUsed - Test if a register is currently being used. When called by the + /// isAliasUsed function, we only check isReserved if this is the original + /// register, not an alias register. /// - bool isUsed(unsigned Reg) const { - return !RegsAvailable.test(Reg) || isReserved(Reg); + bool isUsed(unsigned Reg, bool CheckReserved = true) const { + return !RegsAvailable.test(Reg) || (CheckReserved && isReserved(Reg)); } /// isAliasUsed - Is Reg or an alias currently in use? @@ -147,6 +184,10 @@ private: RegsAvailable |= Regs; } + /// Processes the current instruction and fill the KillRegs and DefRegs bit + /// vectors. + void determineKillsAndDefs(); + /// Add Reg and all its sub-registers to BV. void addRegWithSubRegs(BitVector &BV, unsigned Reg); diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h index 56b5855c01c9..f20a9fce2ae8 100644 --- a/include/llvm/CodeGen/ResourcePriorityQueue.h +++ b/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// -#ifndef RESOURCE_PRIORITY_QUEUE_H -#define RESOURCE_PRIORITY_QUEUE_H +#ifndef LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H +#define LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 4bfd4ab530d1..41289a42c438 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -86,100 +86,131 @@ namespace RTLIB { ADD_F32, ADD_F64, ADD_F80, + ADD_F128, ADD_PPCF128, SUB_F32, SUB_F64, SUB_F80, + SUB_F128, SUB_PPCF128, MUL_F32, MUL_F64, MUL_F80, + MUL_F128, MUL_PPCF128, DIV_F32, DIV_F64, DIV_F80, + DIV_F128, DIV_PPCF128, REM_F32, REM_F64, REM_F80, + REM_F128, REM_PPCF128, FMA_F32, FMA_F64, FMA_F80, + FMA_F128, FMA_PPCF128, POWI_F32, POWI_F64, POWI_F80, + POWI_F128, POWI_PPCF128, SQRT_F32, SQRT_F64, SQRT_F80, + SQRT_F128, SQRT_PPCF128, LOG_F32, LOG_F64, LOG_F80, + LOG_F128, LOG_PPCF128, LOG2_F32, LOG2_F64, LOG2_F80, + LOG2_F128, LOG2_PPCF128, LOG10_F32, LOG10_F64, LOG10_F80, + LOG10_F128, LOG10_PPCF128, EXP_F32, EXP_F64, EXP_F80, + EXP_F128, EXP_PPCF128, EXP2_F32, EXP2_F64, EXP2_F80, + EXP2_F128, EXP2_PPCF128, SIN_F32, SIN_F64, SIN_F80, + SIN_F128, SIN_PPCF128, COS_F32, COS_F64, COS_F80, + COS_F128, COS_PPCF128, + SINCOS_F32, + SINCOS_F64, + SINCOS_F80, + SINCOS_F128, + SINCOS_PPCF128, POW_F32, POW_F64, POW_F80, + POW_F128, POW_PPCF128, CEIL_F32, CEIL_F64, CEIL_F80, + CEIL_F128, CEIL_PPCF128, TRUNC_F32, TRUNC_F64, TRUNC_F80, + TRUNC_F128, TRUNC_PPCF128, RINT_F32, RINT_F64, RINT_F80, + RINT_F128, RINT_PPCF128, NEARBYINT_F32, NEARBYINT_F64, NEARBYINT_F80, + NEARBYINT_F128, NEARBYINT_PPCF128, FLOOR_F32, FLOOR_F64, FLOOR_F80, + FLOOR_F128, FLOOR_PPCF128, COPYSIGN_F32, COPYSIGN_F64, COPYSIGN_F80, + COPYSIGN_F128, COPYSIGN_PPCF128, // CONVERSION + FPEXT_F64_F128, + FPEXT_F32_F128, FPEXT_F32_F64, FPEXT_F16_F32, FPROUND_F32_F16, FPROUND_F64_F32, FPROUND_F80_F32, + FPROUND_F128_F32, FPROUND_PPCF128_F32, FPROUND_F80_F64, + FPROUND_F128_F64, FPROUND_PPCF128_F64, FPTOSINT_F32_I8, FPTOSINT_F32_I16, @@ -194,6 +225,9 @@ namespace RTLIB { FPTOSINT_F80_I32, FPTOSINT_F80_I64, FPTOSINT_F80_I128, + FPTOSINT_F128_I32, + FPTOSINT_F128_I64, + FPTOSINT_F128_I128, FPTOSINT_PPCF128_I32, FPTOSINT_PPCF128_I64, FPTOSINT_PPCF128_I128, @@ -210,51 +244,68 @@ namespace RTLIB { FPTOUINT_F80_I32, FPTOUINT_F80_I64, FPTOUINT_F80_I128, + FPTOUINT_F128_I32, + FPTOUINT_F128_I64, + FPTOUINT_F128_I128, FPTOUINT_PPCF128_I32, FPTOUINT_PPCF128_I64, FPTOUINT_PPCF128_I128, SINTTOFP_I32_F32, SINTTOFP_I32_F64, SINTTOFP_I32_F80, + SINTTOFP_I32_F128, SINTTOFP_I32_PPCF128, SINTTOFP_I64_F32, SINTTOFP_I64_F64, SINTTOFP_I64_F80, + SINTTOFP_I64_F128, SINTTOFP_I64_PPCF128, SINTTOFP_I128_F32, SINTTOFP_I128_F64, SINTTOFP_I128_F80, + SINTTOFP_I128_F128, SINTTOFP_I128_PPCF128, UINTTOFP_I32_F32, UINTTOFP_I32_F64, UINTTOFP_I32_F80, + UINTTOFP_I32_F128, UINTTOFP_I32_PPCF128, UINTTOFP_I64_F32, UINTTOFP_I64_F64, UINTTOFP_I64_F80, + UINTTOFP_I64_F128, UINTTOFP_I64_PPCF128, UINTTOFP_I128_F32, UINTTOFP_I128_F64, UINTTOFP_I128_F80, + UINTTOFP_I128_F128, UINTTOFP_I128_PPCF128, // COMPARISON OEQ_F32, OEQ_F64, + OEQ_F128, UNE_F32, UNE_F64, + UNE_F128, OGE_F32, OGE_F64, + OGE_F128, OLT_F32, OLT_F64, + OLT_F128, OLE_F32, OLE_F64, + OLE_F128, OGT_F32, OGT_F64, + OGT_F128, UO_F32, UO_F64, + UO_F128, O_F32, O_F64, + O_F128, // MEMORY MEMCPY, diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 7e0ca1478e5f..8c959da696d8 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -16,13 +16,12 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAG_H #define LLVM_CODEGEN_SCHEDULEDAG_H -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { class AliasAnalysis; @@ -53,11 +52,22 @@ namespace llvm { Order ///< Any other ordering dependency. }; + // Strong dependencies must be respected by the scheduler. Artificial + // dependencies may be removed only if they are redundant with another + // strong depedence. + // + // Weak dependencies may be violated by the scheduling strategy, but only if + // the strategy can prove it is correct to do so. + // + // Strong OrderKinds must occur before "Weak". + // Weak OrderKinds must occur after "Weak". enum OrderKind { Barrier, ///< An unknown scheduling barrier. MayAliasMem, ///< Nonvolatile load/Store instructions that may alias. MustAliasMem, ///< Nonvolatile load/Store instructions that must alias. - Artificial ///< Arbitrary weak DAG edge (no actual dependence). + Artificial, ///< Arbitrary strong DAG edge (no real dependence). + Weak, ///< Arbitrary weak DAG edge. + Cluster ///< Weak DAG edge linking a chain of clustered instrs. }; private: @@ -200,12 +210,26 @@ namespace llvm { return getKind() == Order && Contents.OrdKind == MustAliasMem; } + /// isWeak - Test if this a weak dependence. Weak dependencies are + /// considered DAG edges for height computation and other heuristics, but do + /// not force ordering. Breaking a weak edge may require the scheduler to + /// compensate, for example by inserting a copy. + bool isWeak() const { + return getKind() == Order && Contents.OrdKind >= Weak; + } + /// isArtificial - Test if this is an Order dependence that is marked /// as "artificial", meaning it isn't necessary for correctness. bool isArtificial() const { return getKind() == Order && Contents.OrdKind == Artificial; } + /// isCluster - Test if this is an Order dependence that is marked + /// as "cluster", meaning it is artificial and wants to be adjacent. + bool isCluster() const { + return getKind() == Order && Contents.OrdKind == Cluster; + } + /// isAssignedRegDep - Test if this is a Data dependence that is /// associated with a register. bool isAssignedRegDep() const { @@ -243,6 +267,8 @@ namespace llvm { /// SUnit - Scheduling unit. This is a node in the scheduling DAG. class SUnit { private: + enum { BoundaryID = ~0u }; + SDNode *Node; // Representative node. MachineInstr *Instr; // Alternatively, a MachineInstr. public: @@ -267,6 +293,8 @@ namespace llvm { unsigned NumSuccs; // # of SDep::Data sucss. unsigned NumPredsLeft; // # of preds not scheduled. unsigned NumSuccsLeft; // # of succs not scheduled. + unsigned WeakPredsLeft; // # of weak preds not scheduled. + unsigned WeakSuccsLeft; // # of weak succs not scheduled. unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use. unsigned short Latency; // Node latency. bool isVRegCycle : 1; // May use and def the same vreg. @@ -301,12 +329,12 @@ namespace llvm { SUnit(SDNode *node, unsigned nodenum) : Node(node), Instr(0), OrigNode(0), SchedClass(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -315,28 +343,37 @@ namespace llvm { SUnit(MachineInstr *instr, unsigned nodenum) : Node(0), Instr(instr), OrigNode(0), SchedClass(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} /// SUnit - Construct a placeholder SUnit. SUnit() - : Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(~0u), + : Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(BoundaryID), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), - isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), - isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isScheduleLow(false), isCloned(false), - SchedulingPref(Sched::None), + NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0), + Latency(0), isVRegCycle(false), isCall(false), isCallOp(false), + isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), + hasPhysRegClobbers(false), isPending(false), isAvailable(false), + isScheduled(false), isScheduleHigh(false), isScheduleLow(false), + isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} + /// \brief Boundary nodes are placeholders for the boundary of the + /// scheduling region. + /// + /// BoundaryNodes can have DAG edges, including Data edges, but they do not + /// correspond to schedulable entities (e.g. instructions) and do not have a + /// valid ID. Consequently, always check for boundary nodes before accessing + /// an assoicative data structure keyed on node ID. + bool isBoundaryNode() const { return NodeNum == BoundaryID; }; + /// setNode - Assign the representative SDNode for this SUnit. /// This may be used during pre-regalloc scheduling. void setNode(SDNode *N) { @@ -372,7 +409,7 @@ namespace llvm { /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. - bool addPred(const SDep &D); + bool addPred(const SDep &D, bool Required = true); /// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of @@ -438,6 +475,10 @@ namespace llvm { return NumSuccsLeft == 0; } + /// \brief Order this node's predecessor edges such that the critical path + /// edge occurs first. + void biasCriticalPath(); + void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; void print(raw_ostream &O, const ScheduleDAG *G) const; @@ -546,8 +587,8 @@ namespace llvm { /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered /// using 'dot'. /// - void viewGraph(const Twine &Name, const Twine &Title); - void viewGraph(); + virtual void viewGraph(const Twine &Name, const Twine &Title); + virtual void viewGraph(); virtual void dumpNode(const SUnit *SU) const = 0; @@ -654,6 +695,7 @@ namespace llvm { class ScheduleDAGTopologicalSort { /// SUnits - A reference to the ScheduleDAG's SUnits. std::vector &SUnits; + SUnit *ExitSU; /// Index2Node - Maps topological index to the node number. std::vector Index2Node; @@ -675,7 +717,7 @@ namespace llvm { void Allocate(int n, int index); public: - explicit ScheduleDAGTopologicalSort(std::vector &SUnits); + ScheduleDAGTopologicalSort(std::vector &SUnits, SUnit *ExitSU); /// InitDAGTopologicalSorting - create the initial topological /// ordering from the DAG to be scheduled. diff --git a/include/llvm/CodeGen/ScheduleDAGILP.h b/include/llvm/CodeGen/ScheduleDAGILP.h deleted file mode 100644 index 1aa405842173..000000000000 --- a/include/llvm/CodeGen/ScheduleDAGILP.h +++ /dev/null @@ -1,86 +0,0 @@ -//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Definition of an ILP metric for machine level instruction scheduling. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H -#define LLVM_CODEGEN_SCHEDULEDAGILP_H - -#include "llvm/Support/DataTypes.h" -#include - -namespace llvm { - -class raw_ostream; -class ScheduleDAGInstrs; -class SUnit; - -/// \brief Represent the ILP of the subDAG rooted at a DAG node. -struct ILPValue { - unsigned InstrCount; - unsigned Cycles; - - ILPValue(): InstrCount(0), Cycles(0) {} - - ILPValue(unsigned count, unsigned cycles): - InstrCount(count), Cycles(cycles) {} - - bool isValid() const { return Cycles > 0; } - - // Order by the ILP metric's value. - bool operator<(ILPValue RHS) const { - return (uint64_t)InstrCount * RHS.Cycles - < (uint64_t)Cycles * RHS.InstrCount; - } - bool operator>(ILPValue RHS) const { - return RHS < *this; - } - bool operator<=(ILPValue RHS) const { - return (uint64_t)InstrCount * RHS.Cycles - <= (uint64_t)Cycles * RHS.InstrCount; - } - bool operator>=(ILPValue RHS) const { - return RHS <= *this; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void print(raw_ostream &OS) const; - - void dump() const; -#endif -}; - -/// \brief Compute the values of each DAG node for an ILP metric. -/// -/// This metric assumes that the DAG is a forest of trees with roots at the -/// bottom of the schedule. -class ScheduleDAGILP { - bool IsBottomUp; - std::vector ILPValues; - -public: - ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {} - - /// \brief Initialize the result data with the size of the DAG. - void resize(unsigned NumSUnits); - - /// \brief Compute the ILP metric for the subDAG at this root. - void computeILP(const SUnit *Root); - - /// \brief Get the ILP value for a DAG node. - ILPValue getILP(const SUnit *SU); -}; - -raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val); - -} // namespace llvm - -#endif diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 4bcd35a834c3..2219520ca19f 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -12,20 +12,18 @@ // //===----------------------------------------------------------------------===// -#ifndef SCHEDULEDAGINSTRS_H -#define SCHEDULEDAGINSTRS_H +#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H +#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/SparseMultiSet.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseSet.h" -#include namespace llvm { + class MachineFrameInfo; class MachineLoopInfo; class MachineDominatorTree; class LiveIntervals; @@ -48,55 +46,17 @@ namespace llvm { struct PhysRegSUOper { SUnit *SU; int OpIdx; + unsigned Reg; - PhysRegSUOper(SUnit *su, int op): SU(su), OpIdx(op) {} + PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} + + unsigned getSparseSetIndex() const { return Reg; } }; - /// Combine a SparseSet with a 1x1 vector to track physical registers. - /// The SparseSet allows iterating over the (few) live registers for quickly - /// comparing against a regmask or clearing the set. - /// - /// Storage for the map is allocated once for the pass. The map can be - /// cleared between scheduling regions without freeing unused entries. - class Reg2SUnitsMap { - SparseSet PhysRegSet; - std::vector > SUnits; - public: - typedef SparseSet::const_iterator const_iterator; - - // Allow iteration over register numbers (keys) in the map. If needed, we - // can provide an iterator over SUnits (values) as well. - const_iterator reg_begin() const { return PhysRegSet.begin(); } - const_iterator reg_end() const { return PhysRegSet.end(); } - - /// Initialize the map with the number of registers. - /// If the map is already large enough, no allocation occurs. - /// For simplicity we expect the map to be empty(). - void setRegLimit(unsigned Limit); - - /// Returns true if the map is empty. - bool empty() const { return PhysRegSet.empty(); } - - /// Clear the map without deallocating storage. - void clear(); - - bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); } - - /// If this register is mapped, return its existing SUnits vector. - /// Otherwise map the register and return an empty SUnits vector. - std::vector &operator[](unsigned Reg) { - bool New = PhysRegSet.insert(Reg).second; - assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); - (void)New; - return SUnits[Reg]; - } - - /// Erase an existing element without freeing memory. - void erase(unsigned Reg) { - PhysRegSet.erase(Reg); - SUnits[Reg].clear(); - } - }; + /// Use a SparseMultiSet to track physical registers. Storage is only + /// allocated once for the pass. It can be cleared in constant time and reused + /// without any frees. + typedef SparseMultiSet, uint16_t> Reg2SUnitsMap; /// Use SparseSet as a SparseMap by relying on the fact that it never /// compares ValueT's, only unsigned keys. This allows the set to be cleared diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h new file mode 100644 index 000000000000..73ce99f4713d --- /dev/null +++ b/include/llvm/CodeGen/ScheduleDFS.h @@ -0,0 +1,196 @@ +//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of an ILP metric for machine level instruction scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SCHEDULEDFS_H +#define LLVM_CODEGEN_SCHEDULEDFS_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { + +class raw_ostream; +class IntEqClasses; +class ScheduleDAGInstrs; +class SUnit; + +/// \brief Represent the ILP of the subDAG rooted at a DAG node. +/// +/// ILPValues summarize the DAG subtree rooted at each node. ILPValues are +/// valid for all nodes regardless of their subtree membership. +/// +/// When computed using bottom-up DFS, this metric assumes that the DAG is a +/// forest of trees with roots at the bottom of the schedule branching upward. +struct ILPValue { + unsigned InstrCount; + /// Length may either correspond to depth or height, depending on direction, + /// and cycles or nodes depending on context. + unsigned Length; + + ILPValue(unsigned count, unsigned length): + InstrCount(count), Length(length) {} + + // Order by the ILP metric's value. + bool operator<(ILPValue RHS) const { + return (uint64_t)InstrCount * RHS.Length + < (uint64_t)Length * RHS.InstrCount; + } + bool operator>(ILPValue RHS) const { + return RHS < *this; + } + bool operator<=(ILPValue RHS) const { + return (uint64_t)InstrCount * RHS.Length + <= (uint64_t)Length * RHS.InstrCount; + } + bool operator>=(ILPValue RHS) const { + return RHS <= *this; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream &OS) const; + + void dump() const; +#endif +}; + +/// \brief Compute the values of each DAG node for various metrics during DFS. +class SchedDFSResult { + friend class SchedDFSImpl; + + static const unsigned InvalidSubtreeID = ~0u; + + /// \brief Per-SUnit data computed during DFS for various metrics. + /// + /// A node's SubtreeID is set to itself when it is visited to indicate that it + /// is the root of a subtree. Later it is set to its parent to indicate an + /// interior node. Finally, it is set to a representative subtree ID during + /// finalization. + struct NodeData { + unsigned InstrCount; + unsigned SubtreeID; + + NodeData(): InstrCount(0), SubtreeID(InvalidSubtreeID) {} + }; + + /// \brief Per-Subtree data computed during DFS. + struct TreeData { + unsigned ParentTreeID; + unsigned SubInstrCount; + + TreeData(): ParentTreeID(InvalidSubtreeID), SubInstrCount(0) {} + }; + + /// \brief Record a connection between subtrees and the connection level. + struct Connection { + unsigned TreeID; + unsigned Level; + + Connection(unsigned tree, unsigned level): TreeID(tree), Level(level) {} + }; + + bool IsBottomUp; + unsigned SubtreeLimit; + /// DFS results for each SUnit in this DAG. + std::vector DFSNodeData; + + // Store per-tree data indexed on tree ID, + SmallVector DFSTreeData; + + // For each subtree discovered during DFS, record its connections to other + // subtrees. + std::vector > SubtreeConnections; + + /// Cache the current connection level of each subtree. + /// This mutable array is updated during scheduling. + std::vector SubtreeConnectLevels; + +public: + SchedDFSResult(bool IsBU, unsigned lim) + : IsBottomUp(IsBU), SubtreeLimit(lim) {} + + /// \brief Get the node cutoff before subtrees are considered significant. + unsigned getSubtreeLimit() const { return SubtreeLimit; } + + /// \brief Return true if this DFSResult is uninitialized. + /// + /// resize() initializes DFSResult, while compute() populates it. + bool empty() const { return DFSNodeData.empty(); } + + /// \brief Clear the results. + void clear() { + DFSNodeData.clear(); + DFSTreeData.clear(); + SubtreeConnections.clear(); + SubtreeConnectLevels.clear(); + } + + /// \brief Initialize the result data with the size of the DAG. + void resize(unsigned NumSUnits) { + DFSNodeData.resize(NumSUnits); + } + + /// \brief Compute various metrics for the DAG with given roots. + void compute(ArrayRef SUnits); + + /// \brief Get the number of instructions in the given subtree and its + /// children. + unsigned getNumInstrs(const SUnit *SU) const { + return DFSNodeData[SU->NodeNum].InstrCount; + } + + /// \brief Get the number of instructions in the given subtree not including + /// children. + unsigned getNumSubInstrs(unsigned SubtreeID) const { + return DFSTreeData[SubtreeID].SubInstrCount; + } + + /// \brief Get the ILP value for a DAG node. + /// + /// A leaf node has an ILP of 1/1. + ILPValue getILP(const SUnit *SU) const { + return ILPValue(DFSNodeData[SU->NodeNum].InstrCount, 1 + SU->getDepth()); + } + + /// \brief The number of subtrees detected in this DAG. + unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); } + + /// \brief Get the ID of the subtree the given DAG node belongs to. + /// + /// For convenience, if DFSResults have not been computed yet, give everything + /// tree ID 0. + unsigned getSubtreeID(const SUnit *SU) const { + if (empty()) + return 0; + assert(SU->NodeNum < DFSNodeData.size() && "New Node"); + return DFSNodeData[SU->NodeNum].SubtreeID; + } + + /// \brief Get the connection level of a subtree. + /// + /// For bottom-up trees, the connection level is the latency depth (in cycles) + /// of the deepest connection to another subtree. + unsigned getSubtreeLevel(unsigned SubtreeID) const { + return SubtreeConnectLevels[SubtreeID]; + } + + /// \brief Scheduler callback to update SubtreeConnectLevels when a tree is + /// initially scheduled. + void scheduleTree(unsigned SubtreeID); +}; + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val); + +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h index 836b73a15a2f..51ac7f28527f 100644 --- a/include/llvm/CodeGen/SchedulerRegistry.h +++ b/include/llvm/CodeGen/SchedulerRegistry.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGENSCHEDULERREGISTRY_H -#define LLVM_CODEGENSCHEDULERREGISTRY_H +#ifndef LLVM_CODEGEN_SCHEDULERREGISTRY_H +#define LLVM_CODEGEN_SCHEDULERREGISTRY_H #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/Target/TargetMachine.h" diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h index 060e89a3fdc7..c2103fb233f8 100644 --- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h +++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h @@ -18,7 +18,6 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Support/DataTypes.h" - #include #include diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 619ee699430d..e5adf6724931 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -15,16 +15,17 @@ #ifndef LLVM_CODEGEN_SELECTIONDAG_H #define LLVM_CODEGEN_SELECTIONDAG_H -#include "llvm/ADT/ilist.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist.h" +#include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetMachine.h" #include -#include #include #include +#include namespace llvm { @@ -36,6 +37,7 @@ class SDNodeOrdering; class SDDbgValue; class TargetLowering; class TargetSelectionDAGInfo; +class TargetTransformInfo; template<> struct ilist_traits : public ilist_default_traits { private: @@ -111,13 +113,6 @@ public: DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); } }; -enum CombineLevel { - BeforeLegalizeTypes, - AfterLegalizeTypes, - AfterLegalizeVectorOps, - AfterLegalizeDAG -}; - class SelectionDAG; void checkForCycles(const SDNode *N); void checkForCycles(const SelectionDAG *DAG); @@ -137,6 +132,7 @@ class SelectionDAG { const TargetMachine &TM; const TargetLowering &TLI; const TargetSelectionDAGInfo &TSI; + const TargetTransformInfo *TTI; MachineFunction *MF; LLVMContext *Context; CodeGenOpt::Level OptLevel; @@ -232,7 +228,7 @@ public: /// init - Prepare this SelectionDAG to process code in the given /// MachineFunction. /// - void init(MachineFunction &mf); + void init(MachineFunction &mf, const TargetTransformInfo *TTI); /// clear - Clear state and free memory necessary to make this /// SelectionDAG ready to process a new block. @@ -243,6 +239,7 @@ public: const TargetMachine &getTarget() const { return TM; } const TargetLowering &getTargetLoweringInfo() const { return TLI; } const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; } + const TargetTransformInfo *getTargetTransformInfo() const { return TTI; } LLVMContext *getContext() const {return Context; } /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'. @@ -570,7 +567,7 @@ public: SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps); SDValue getNode(unsigned Opcode, DebugLoc DL, - const std::vector &ResultTys, + ArrayRef ResultTys, const SDValue *Ops, unsigned NumOps); SDValue getNode(unsigned Opcode, DebugLoc DL, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps); @@ -834,7 +831,7 @@ public: MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps); MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, - const std::vector &ResultTys, const SDValue *Ops, + ArrayRef ResultTys, const SDValue *Ops, unsigned NumOps); MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs, const SDValue *Ops, unsigned NumOps); @@ -938,6 +935,20 @@ public: } } + /// Returns an APFloat semantics tag appropriate for the given type. If VT is + /// a vector type, the element semantics are returned. + static const fltSemantics &EVTToAPFloatSemantics(EVT VT) { + switch (VT.getScalarType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return APFloat::IEEEhalf; + case MVT::f32: return APFloat::IEEEsingle; + case MVT::f64: return APFloat::IEEEdouble; + case MVT::f80: return APFloat::x87DoubleExtended; + case MVT::f128: return APFloat::IEEEquad; + case MVT::ppcf128: return APFloat::PPCDoubleDouble; + } + } + /// AssignOrdering - Assign an order to the SDNode. void AssignOrdering(const SDNode *SD, unsigned Order); @@ -981,10 +992,8 @@ public: SDValue CreateStackTemporary(EVT VT1, EVT VT2); /// FoldConstantArithmetic - - SDValue FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2); + SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2); /// FoldSetCC - Constant fold a setcc to true or false. SDValue FoldSetCC(EVT VT, SDValue N1, diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index c42f6558007b..5f503deff10e 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SELECTIONDAG_ISEL_H -#define LLVM_CODEGEN_SELECTIONDAG_ISEL_H +#ifndef LLVM_CODEGEN_SELECTIONDAGISEL_H +#define LLVM_CODEGEN_SELECTIONDAGISEL_H -#include "llvm/BasicBlock.h" -#include "llvm/Pass.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Pass.h" namespace llvm { class FastISel; @@ -31,6 +31,7 @@ namespace llvm { class TargetLowering; class TargetLibraryInfo; class TargetInstrInfo; + class TargetTransformInfo; class FunctionLoweringInfo; class ScheduleHazardRecognizer; class GCFunctionInfo; @@ -44,6 +45,7 @@ public: const TargetMachine &TM; const TargetLowering &TLI; const TargetLibraryInfo *LibInfo; + const TargetTransformInfo *TTI; FunctionLoweringInfo *FuncInfo; MachineFunction *MF; MachineRegisterInfo *RegInfo; @@ -247,16 +249,26 @@ private: const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo); void PrepareEHLandingPad(); + + /// \brief Perform instruction selection on all basic blocks in the function. void SelectAllBasicBlocks(const Function &Fn); + + /// \brief Perform instruction selection on a single basic block, for + /// instructions between \p Begin and \p End. \p HadTailCall will be set + /// to true if a call in the block was translated as a tail call. + void SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall); + bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst, FastISel *FastIS); void FinishBasicBlock(); - void SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall); void CodeGenAndEmitDAG(); - void LowerArguments(const BasicBlock *BB); + + /// \brief Generate instructions for lowering the incoming arguments of the + /// given function. + void LowerArguments(const Function &F); void ComputeLiveOutVRegInfo(); @@ -279,4 +291,4 @@ private: } -#endif /* LLVM_CODEGEN_SELECTIONDAG_ISEL_H */ +#endif /* LLVM_CODEGEN_SELECTIONDAGISEL_H */ diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 362e9afd225a..fef567f56bce 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -19,20 +19,20 @@ #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H #define LLVM_CODEGEN_SELECTIONDAGNODES_H -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Support/MathExtras.h" #include namespace llvm { @@ -49,7 +49,7 @@ template struct simplify_type; template struct ilist_traits; void checkForCycles(const SDNode *N); - + /// SDVTList - This represents a list of ValueType's that has been intern'd by /// a SelectionDAG. Instances of this simple value class are returned by /// SelectionDAG::getVTList(...). @@ -108,7 +108,7 @@ public: void setNode(SDNode *N) { Node = N; } inline SDNode *operator->() const { return Node; } - + bool operator==(const SDValue &O) const { return Node == O.Node && ResNo == O.ResNo; } @@ -130,6 +130,11 @@ public: /// inline EVT getValueType() const; + /// Return the simple ValueType of the referenced return value. + MVT getSimpleValueType() const { + return getValueType().getSimpleVT(); + } + /// getValueSizeInBits - Returns the size of the value in bits. /// unsigned getValueSizeInBits() const { @@ -191,14 +196,14 @@ template <> struct isPodLike { static const bool value = true; }; /// SDValues as if they were SDNode*'s. template<> struct simplify_type { typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDValue &Val) { - return static_cast(Val.getNode()); + static SimpleType getSimplifiedValue(SDValue &Val) { + return Val.getNode(); } }; template<> struct simplify_type { - typedef SDNode* SimpleType; + typedef /*const*/ SDNode* SimpleType; static SimpleType getSimplifiedValue(const SDValue &Val) { - return static_cast(Val.getNode()); + return Val.getNode(); } }; @@ -290,14 +295,8 @@ private: /// SDValues as if they were SDNode*'s. template<> struct simplify_type { typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDUse &Val) { - return static_cast(Val.getNode()); - } -}; -template<> struct simplify_type { - typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDUse &Val) { - return static_cast(Val.getNode()); + static SimpleType getSimplifiedValue(SDUse &Val) { + return Val.getNode(); } }; @@ -525,7 +524,7 @@ public: /// NOTE: This is still very expensive. Use carefully. bool hasPredecessorHelper(const SDNode *N, SmallPtrSet &Visited, - SmallVector &Worklist) const; + SmallVector &Worklist) const; /// getNumOperands - Return the number of values used by this operation. /// @@ -595,6 +594,12 @@ public: return ValueList[ResNo]; } + /// Return the type of a specified result as a simple type. + /// + MVT getSimpleValueType(unsigned ResNo) const { + return getValueType(ResNo).getSimpleVT(); + } + /// getValueSizeInBits - Returns MVT::getSizeInBits(getValueType(ResNo)). /// unsigned getValueSizeInBits(unsigned ResNo) const { @@ -1287,7 +1292,7 @@ class ConstantPoolSDNode : public SDNode { : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { - assert((int)Offset >= 0 && "Offset is too large"); + assert(Offset >= 0 && "Offset is too large"); Val.ConstVal = c; } ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, @@ -1295,7 +1300,7 @@ class ConstantPoolSDNode : public SDNode { : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { - assert((int)Offset >= 0 && "Offset is too large"); + assert(Offset >= 0 && "Offset is too large"); Val.MachineCPVal = v; Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1); } @@ -1303,7 +1308,7 @@ public: bool isMachineConstantPoolEntry() const { - return (int)Offset < 0; + return Offset < 0; } const Constant *getConstVal() const { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index c52599b0f6f9..a27708046686 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -19,13 +19,14 @@ #ifndef LLVM_CODEGEN_SLOTINDEXES_H #define LLVM_CODEGEN_SLOTINDEXES_H -#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/Support/Allocator.h" namespace llvm { @@ -111,7 +112,7 @@ namespace llvm { return lie.getPointer(); } - int getIndex() const { + unsigned getIndex() const { return listEntry()->getIndex() | getSlot(); } @@ -359,6 +360,11 @@ namespace llvm { /// Renumber the index list, providing space for new instructions. void renumberIndexes(); + /// Repair indexes after adding and removing instructions. + void repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End); + /// Returns the zero index for this analysis. SlotIndex getZeroIndex() { assert(indexList.front().getIndex() == 0 && "First index is not 0?"); @@ -390,12 +396,16 @@ namespace llvm { return index.isValid() ? index.listEntry()->getInstr() : 0; } - /// Returns the next non-null index. - SlotIndex getNextNonNullIndex(SlotIndex index) { - IndexList::iterator itr(index.listEntry()); - ++itr; - while (itr != indexList.end() && itr->getInstr() == 0) { ++itr; } - return SlotIndex(itr, index.getSlot()); + /// Returns the next non-null index, if one exists. + /// Otherwise returns getLastIndex(). + SlotIndex getNextNonNullIndex(SlotIndex Index) { + IndexList::iterator I = Index.listEntry(); + IndexList::iterator E = indexList.end(); + while (++I != E) + if (I->getInstr()) + return SlotIndex(I, Index.getSlot()); + // We reached the end of the function. + return getLastIndex(); } /// getIndexBefore - Returns the index of the last indexed instruction @@ -601,29 +611,35 @@ namespace llvm { void insertMBBInMaps(MachineBasicBlock *mbb) { MachineFunction::iterator nextMBB = llvm::next(MachineFunction::iterator(mbb)); - IndexListEntry *startEntry = createEntry(0, 0); - IndexListEntry *stopEntry = createEntry(0, 0); - IndexListEntry *nextEntry = 0; + IndexListEntry *startEntry = 0; + IndexListEntry *endEntry = 0; + IndexList::iterator newItr; if (nextMBB == mbb->getParent()->end()) { - nextEntry = indexList.end(); + startEntry = &indexList.back(); + endEntry = createEntry(0, 0); + newItr = indexList.insertAfter(startEntry, endEntry); } else { - nextEntry = getMBBStartIdx(nextMBB).listEntry(); + startEntry = createEntry(0, 0); + endEntry = getMBBStartIdx(nextMBB).listEntry(); + newItr = indexList.insert(endEntry, startEntry); } - indexList.insert(nextEntry, startEntry); - indexList.insert(nextEntry, stopEntry); - SlotIndex startIdx(startEntry, SlotIndex::Slot_Block); - SlotIndex endIdx(nextEntry, SlotIndex::Slot_Block); + SlotIndex endIdx(endEntry, SlotIndex::Slot_Block); + + MachineFunction::iterator prevMBB(mbb); + assert(prevMBB != mbb->getParent()->end() && + "Can't insert a new block at the beginning of a function."); + --prevMBB; + MBBRanges[prevMBB->getNumber()].second = startIdx; assert(unsigned(mbb->getNumber()) == MBBRanges.size() && "Blocks must be added in order"); MBBRanges.push_back(std::make_pair(startIdx, endIdx)); - idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb)); - renumberIndexes(); + renumberIndexes(newItr); std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); } @@ -631,17 +647,8 @@ namespace llvm { // Specialize IntervalMapInfo for half-open slot index intervals. - template struct IntervalMapInfo; - template <> struct IntervalMapInfo { - static inline bool startLess(const SlotIndex &x, const SlotIndex &a) { - return x < a; - } - static inline bool stopLess(const SlotIndex &b, const SlotIndex &x) { - return b <= x; - } - static inline bool adjacent(const SlotIndex &a, const SlotIndex &b) { - return a == b; - } + template <> + struct IntervalMapInfo : IntervalMapHalfOpenInfo { }; } diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 9849e92f7dec..e7098e48bf06 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/SectionKind.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/ADT/StringRef.h" namespace llvm { class MachineModuleInfo; @@ -55,13 +55,12 @@ public: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; - /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference - /// to the specified global variable from exception handling information. - /// + /// getTTypeGlobalReference - Return an MCExpr to use for a reference to the + /// specified type info global variable from exception handling information. virtual const MCExpr * - getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const; + getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const; // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. virtual MCSymbol * @@ -103,12 +102,12 @@ public: virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *) const; - /// getExprForDwarfGlobalReference - The mach-o version of this method + /// getTTypeGlobalReference - The mach-o version of this method /// defaults to returning a stub reference. virtual const MCExpr * - getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const; + getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const; // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. virtual MCSymbol * diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 88e6105a7de2..3e22252eeac1 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_TARGETSCHEDMODEL_H -#define LLVM_TARGET_TARGETSCHEDMODEL_H +#ifndef LLVM_CODEGEN_TARGETSCHEDULE_H +#define LLVM_CODEGEN_TARGETSCHEDULE_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Target/TargetSubtargetInfo.h" namespace llvm { @@ -84,6 +84,9 @@ public: /// \brief Maximum number of micro-ops that may be scheduled per cycle. unsigned getIssueWidth() const { return SchedModel.IssueWidth; } + /// \brief Number of cycles the OOO processor is expected to hide. + unsigned getILPWindow() const { return SchedModel.ILPWindow; } + /// \brief Return the number of issue slots required for this MI. unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC = 0) const; diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 240199291ae9..ec48b67b993c 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -33,6 +33,10 @@ namespace llvm { class MVT { public: enum SimpleValueType { + // INVALID_SIMPLE_VALUE_TYPE - Simple value types less than zero are + // considered extended value types. + INVALID_SIMPLE_VALUE_TYPE = -1, + // If you change this numbering, you must change the values in // ValueTypes.td as well! Other = 0, // This is a non-standard value @@ -60,52 +64,61 @@ namespace llvm { v4i1 = 14, // 4 x i1 v8i1 = 15, // 8 x i1 v16i1 = 16, // 16 x i1 - v2i8 = 17, // 2 x i8 - v4i8 = 18, // 4 x i8 - v8i8 = 19, // 8 x i8 - v16i8 = 20, // 16 x i8 - v32i8 = 21, // 32 x i8 - v1i16 = 22, // 1 x i16 - v2i16 = 23, // 2 x i16 - v4i16 = 24, // 4 x i16 - v8i16 = 25, // 8 x i16 - v16i16 = 26, // 16 x i16 - v1i32 = 27, // 1 x i32 - v2i32 = 28, // 2 x i32 - v4i32 = 29, // 4 x i32 - v8i32 = 30, // 8 x i32 - v16i32 = 31, // 16 x i32 - v1i64 = 32, // 1 x i64 - v2i64 = 33, // 2 x i64 - v4i64 = 34, // 4 x i64 - v8i64 = 35, // 8 x i64 - v16i64 = 36, // 16 x i64 + v32i1 = 17, // 32 x i1 + v64i1 = 18, // 64 x i1 - v2f16 = 37, // 2 x f16 - v2f32 = 38, // 2 x f32 - v4f32 = 39, // 4 x f32 - v8f32 = 40, // 8 x f32 - v2f64 = 41, // 2 x f64 - v4f64 = 42, // 4 x f64 + v2i8 = 19, // 2 x i8 + v4i8 = 20, // 4 x i8 + v8i8 = 21, // 8 x i8 + v16i8 = 22, // 16 x i8 + v32i8 = 23, // 32 x i8 + v64i8 = 24, // 64 x i8 + v1i16 = 25, // 1 x i16 + v2i16 = 26, // 2 x i16 + v4i16 = 27, // 4 x i16 + v8i16 = 28, // 8 x i16 + v16i16 = 29, // 16 x i16 + v32i16 = 30, // 32 x i16 + v1i32 = 31, // 1 x i32 + v2i32 = 32, // 2 x i32 + v4i32 = 33, // 4 x i32 + v8i32 = 34, // 8 x i32 + v16i32 = 35, // 16 x i32 + v1i64 = 36, // 1 x i64 + v2i64 = 37, // 2 x i64 + v4i64 = 38, // 4 x i64 + v8i64 = 39, // 8 x i64 + v16i64 = 40, // 16 x i64 - FIRST_VECTOR_VALUETYPE = v2i1, - LAST_VECTOR_VALUETYPE = v4f64, FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, LAST_INTEGER_VECTOR_VALUETYPE = v16i64, + + v2f16 = 41, // 2 x f16 + v2f32 = 42, // 2 x f32 + v4f32 = 43, // 4 x f32 + v8f32 = 44, // 8 x f32 + v16f32 = 45, // 16 x f32 + v2f64 = 46, // 2 x f64 + v4f64 = 47, // 4 x f64 + v8f64 = 48, // 8 x f64 + FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = v4f64, + LAST_FP_VECTOR_VALUETYPE = v8f64, - x86mmx = 43, // This is an X86 MMX value + FIRST_VECTOR_VALUETYPE = v2i1, + LAST_VECTOR_VALUETYPE = v8f64, - Glue = 44, // This glues nodes together during pre-RA sched + x86mmx = 49, // This is an X86 MMX value - isVoid = 45, // This has no value + Glue = 50, // This glues nodes together during pre-RA sched - Untyped = 46, // This value takes a register, but has + isVoid = 51, // This has no value + + Untyped = 52, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - LAST_VALUETYPE = 47, // This always remains at the end of the list. + LAST_VALUETYPE = 53, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -137,14 +150,7 @@ namespace llvm { // iPTR - An int value the size of the pointer of the current // target. This should only be used internal to tblgen! - iPTR = 255, - - // LastSimpleValueType - The greatest valid SimpleValueType value. - LastSimpleValueType = 255, - - // INVALID_SIMPLE_VALUE_TYPE - Simple value types greater than or equal - // to this are considered extended value types. - INVALID_SIMPLE_VALUE_TYPE = LastSimpleValueType + 1 + iPTR = 255 }; SimpleValueType SimpleTy; @@ -216,7 +222,9 @@ namespace llvm { /// is512BitVector - Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32); + return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 || + SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 || + SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32); } /// is1024BitVector - Return true if this is a 1024-bit vector type. @@ -254,17 +262,21 @@ namespace llvm { case v2i1 : case v4i1 : case v8i1 : - case v16i1: return i1; + case v16i1 : + case v32i1 : + case v64i1: return i1; case v2i8 : case v4i8 : case v8i8 : case v16i8: - case v32i8: return i8; + case v32i8: + case v64i8: return i8; case v1i16: case v2i16: case v4i16: case v8i16: - case v16i16: return i16; + case v16i16: + case v32i16: return i16; case v1i32: case v2i32: case v4i32: @@ -278,9 +290,11 @@ namespace llvm { case v2f16: return f16; case v2f32: case v4f32: - case v8f32: return f32; + case v8f32: + case v16f32: return f32; case v2f64: - case v4f64: return f64; + case v4f64: + case v8f64: return f64; } } @@ -288,18 +302,24 @@ namespace llvm { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); - case v32i8: return 32; + case v32i1: + case v32i8: + case v32i16: return 32; + case v64i1: + case v64i8: return 64; case v16i1: case v16i8: case v16i16: case v16i32: - case v16i64:return 16; - case v8i1: + case v16i64: + case v16f32: return 16; + case v8i1 : case v8i8 : case v8i16: case v8i32: case v8i64: - case v8f32: return 8; + case v8f32: + case v8f64: return 8; case v4i1: case v4i8: case v4i16: @@ -328,7 +348,10 @@ namespace llvm { case iPTRAny: case iAny: case fAny: + case vAny: llvm_unreachable("Value type is overloaded."); + case Metadata: + llvm_unreachable("Value type is metadata."); default: llvm_unreachable("getSizeInBits called on extended MVT."); case i1 : return 1; @@ -343,13 +366,15 @@ namespace llvm { case v1i16: return 16; case f32 : case i32 : + case v32i1: case v4i8: case v2i16: - case v2f16: + case v2f16: case v1i32: return 32; case x86mmx: case f64 : case i64 : + case v64i1: case v8i8: case v4i16: case v2i32: @@ -371,8 +396,12 @@ namespace llvm { case v4i64: case v8f32: case v4f64: return 256; + case v64i8: + case v32i16: case v16i32: - case v8i64: return 512; + case v8i64: + case v16f32: + case v8f64: return 512; case v16i64:return 1024; } } @@ -389,6 +418,27 @@ namespace llvm { return getStoreSize() * 8; } + /// Return true if this has more bits than VT. + bool bitsGT(MVT VT) const { + return getSizeInBits() > VT.getSizeInBits(); + } + + /// Return true if this has no less bits than VT. + bool bitsGE(MVT VT) const { + return getSizeInBits() >= VT.getSizeInBits(); + } + + /// Return true if this has less bits than VT. + bool bitsLT(MVT VT) const { + return getSizeInBits() < VT.getSizeInBits(); + } + + /// Return true if this has no more bits than VT. + bool bitsLE(MVT VT) const { + return getSizeInBits() <= VT.getSizeInBits(); + } + + static MVT getFloatingPointVT(unsigned BitWidth) { switch (BitWidth) { default: @@ -434,6 +484,8 @@ namespace llvm { if (NumElements == 4) return MVT::v4i1; if (NumElements == 8) return MVT::v8i1; if (NumElements == 16) return MVT::v16i1; + if (NumElements == 32) return MVT::v32i1; + if (NumElements == 64) return MVT::v64i1; break; case MVT::i8: if (NumElements == 2) return MVT::v2i8; @@ -441,6 +493,7 @@ namespace llvm { if (NumElements == 8) return MVT::v8i8; if (NumElements == 16) return MVT::v16i8; if (NumElements == 32) return MVT::v32i8; + if (NumElements == 64) return MVT::v64i8; break; case MVT::i16: if (NumElements == 1) return MVT::v1i16; @@ -448,6 +501,7 @@ namespace llvm { if (NumElements == 4) return MVT::v4i16; if (NumElements == 8) return MVT::v8i16; if (NumElements == 16) return MVT::v16i16; + if (NumElements == 32) return MVT::v32i16; break; case MVT::i32: if (NumElements == 1) return MVT::v1i32; @@ -470,14 +524,22 @@ namespace llvm { if (NumElements == 2) return MVT::v2f32; if (NumElements == 4) return MVT::v4f32; if (NumElements == 8) return MVT::v8f32; + if (NumElements == 16) return MVT::v16f32; break; case MVT::f64: if (NumElements == 2) return MVT::v2f64; if (NumElements == 4) return MVT::v4f64; + if (NumElements == 8) return MVT::v8f64; break; } return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); } + + /// Return the value type corresponding to the specified type. This returns + /// all pointers as iPTR. If HandleUnknown is true, unknown types are + /// returned as Other, otherwise they are invalid. + static MVT getVT(Type *Ty, bool HandleUnknown = false); + }; @@ -501,7 +563,7 @@ namespace llvm { bool operator!=(EVT VT) const { if (V.SimpleTy != VT.V.SimpleTy) return true; - if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + if (V.SimpleTy < 0) return LLVMTy != VT.LLVMTy; return false; } @@ -517,7 +579,7 @@ namespace llvm { /// number of bits. static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) { MVT M = MVT::getIntegerVT(BitWidth); - if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) + if (M.SimpleTy >= 0) return M; return getExtendedIntegerVT(Context, BitWidth); } @@ -526,7 +588,7 @@ namespace llvm { /// length, where each element is of type VT. static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) { MVT M = MVT::getVectorVT(VT.V, NumElements); - if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) + if (M.SimpleTy >= 0) return M; return getExtendedVectorVT(Context, VT, NumElements); } @@ -541,7 +603,7 @@ namespace llvm { unsigned BitWidth = EltTy.getSizeInBits(); MVT IntTy = MVT::getIntegerVT(BitWidth); MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements()); - assert(VecTy != MVT::INVALID_SIMPLE_VALUE_TYPE && + assert(VecTy.SimpleTy >= 0 && "Simple vector VT not representable by simple integer vector VT!"); return VecTy; } @@ -549,7 +611,7 @@ namespace llvm { /// isSimple - Test if the given EVT is simple (as opposed to being /// extended). bool isSimple() const { - return V.SimpleTy <= MVT::LastSimpleValueType; + return V.SimpleTy >= 0; } /// isExtended - Test if the given EVT is extended (as opposed to @@ -765,7 +827,7 @@ namespace llvm { /// types are returned as Other, otherwise they are invalid. static EVT getEVT(Type *Ty, bool HandleUnknown = false); - intptr_t getRawBits() { + intptr_t getRawBits() const { if (isSimple()) return V.SimpleTy; else diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index a707f887aaf4..76df6ac8e65b 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -37,39 +37,45 @@ def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value def v16i1 : ValueType<16, 16>; // 16 x i1 vector value -def v2i8 : ValueType<16 , 17>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 18>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 19>; // 8 x i8 vector value -def v16i8 : ValueType<128, 20>; // 16 x i8 vector value -def v32i8 : ValueType<256, 21>; // 32 x i8 vector value -def v1i16 : ValueType<16 , 22>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 23>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 24>; // 4 x i16 vector value -def v8i16 : ValueType<128, 25>; // 8 x i16 vector value -def v16i16 : ValueType<256, 26>; // 16 x i16 vector value -def v1i32 : ValueType<32 , 27>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 28>; // 2 x i32 vector value -def v4i32 : ValueType<128, 29>; // 4 x i32 vector value -def v8i32 : ValueType<256, 30>; // 8 x i32 vector value -def v16i32 : ValueType<512, 31>; // 16 x i32 vector value -def v1i64 : ValueType<64 , 32>; // 1 x i64 vector value -def v2i64 : ValueType<128, 33>; // 2 x i64 vector value -def v4i64 : ValueType<256, 34>; // 4 x i64 vector value -def v8i64 : ValueType<512, 35>; // 8 x i64 vector value -def v16i64 : ValueType<1024,36>; // 16 x i64 vector value +def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value +def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value +def v2i8 : ValueType<16 , 19>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 20>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 21>; // 8 x i8 vector value +def v16i8 : ValueType<128, 22>; // 16 x i8 vector value +def v32i8 : ValueType<256, 23>; // 32 x i8 vector value +def v64i8 : ValueType<256, 24>; // 64 x i8 vector value +def v1i16 : ValueType<16 , 25>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 26>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 27>; // 4 x i16 vector value +def v8i16 : ValueType<128, 28>; // 8 x i16 vector value +def v16i16 : ValueType<256, 29>; // 16 x i16 vector value +def v32i16 : ValueType<256, 30>; // 32 x i16 vector value +def v1i32 : ValueType<32 , 31>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 32>; // 2 x i32 vector value +def v4i32 : ValueType<128, 33>; // 4 x i32 vector value +def v8i32 : ValueType<256, 34>; // 8 x i32 vector value +def v16i32 : ValueType<512, 35>; // 16 x i32 vector value +def v1i64 : ValueType<64 , 36>; // 1 x i64 vector value +def v2i64 : ValueType<128, 37>; // 2 x i64 vector value +def v4i64 : ValueType<256, 38>; // 4 x i64 vector value +def v8i64 : ValueType<512, 39>; // 8 x i64 vector value +def v16i64 : ValueType<1024,40>; // 16 x i64 vector value -def v2f16 : ValueType<32 , 37>; // 2 x f16 vector value -def v2f32 : ValueType<64 , 38>; // 2 x f32 vector value -def v4f32 : ValueType<128, 39>; // 4 x f32 vector value -def v8f32 : ValueType<256, 40>; // 8 x f32 vector value -def v2f64 : ValueType<128, 41>; // 2 x f64 vector value -def v4f64 : ValueType<256, 42>; // 4 x f64 vector value +def v2f16 : ValueType<32 , 41>; // 2 x f16 vector value +def v2f32 : ValueType<64 , 42>; // 2 x f32 vector value +def v4f32 : ValueType<128, 43>; // 4 x f32 vector value +def v8f32 : ValueType<256, 44>; // 8 x f32 vector value +def v16f32 : ValueType<512, 45>; // 16 x f32 vector value +def v2f64 : ValueType<128, 46>; // 2 x f64 vector value +def v4f64 : ValueType<256, 47>; // 4 x f64 vector value +def v8f64 : ValueType<512, 48>; // 8 x f64 vector value -def x86mmx : ValueType<64 , 43>; // X86 MMX value -def FlagVT : ValueType<0 , 44>; // Pre-RA sched glue -def isVoid : ValueType<0 , 45>; // Produces no value -def untyped: ValueType<8 , 46>; // Produces an untyped value +def x86mmx : ValueType<64 , 49>; // X86 MMX value +def FlagVT : ValueType<0 , 50>; // Pre-RA sched glue +def isVoid : ValueType<0 , 51>; // Produces no value +def untyped: ValueType<8 , 52>; // Produces an untyped value def MetadataVT: ValueType<0, 250>; // Metadata // Pseudo valuetype mapped to the current pointer size to any address space. diff --git a/lib/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h similarity index 95% rename from lib/CodeGen/VirtRegMap.h rename to include/llvm/CodeGen/VirtRegMap.h index 7974dda66a5f..3bc6ebd563f2 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -17,9 +17,9 @@ #ifndef LLVM_CODEGEN_VIRTREGMAP_H #define LLVM_CODEGEN_VIRTREGMAP_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/IndexedMap.h" namespace llvm { class MachineInstr; @@ -126,13 +126,13 @@ namespace llvm { grow(); } - /// @brief returns the register allocation preference. - unsigned getRegAllocPref(unsigned virtReg); - /// @brief returns true if VirtReg is assigned to its preferred physreg. - bool hasPreferredPhys(unsigned VirtReg) { - return getPhys(VirtReg) == getRegAllocPref(VirtReg); - } + bool hasPreferredPhys(unsigned VirtReg); + + /// @brief returns true if VirtReg has a known preferred register. + /// This returns false if VirtReg has a preference that is a virtual + /// register that hasn't been assigned yet. + bool hasKnownPreference(unsigned VirtReg); /// @brief records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index ca6412472991..0a2685739782 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -6,6 +6,9 @@ /* Bug report URL. */ #define BUG_REPORT_URL "${BUG_REPORT_URL}" +/* Define if we have libxml2 */ +#cmakedefine CLANG_HAVE_LIBXML ${CLANG_HAVE_LIBXML} + /* Relative directory for resource files */ #define CLANG_RESOURCE_DIR "${CLANG_RESOURCE_DIR}" @@ -69,7 +72,7 @@ /* Define to 1 if you have the header file. */ #undef HAVE_CRASHREPORTERCLIENT_H -/* Define if __crashreporter_info__ exists. */ +/* can use __crashreporter_info__ */ #undef HAVE_CRASHREPORTER_INFO /* Define to 1 if you have the header file. */ @@ -143,6 +146,24 @@ /* Define to 1 if you have the `floorf' function. */ #cmakedefine HAVE_FLOORF ${HAVE_FLOORF} +/* Define to 1 if you have the `log' function. */ +#cmakedefine HAVE_LOG ${HAVE_LOG} + +/* Define to 1 if you have the `log2' function. */ +#cmakedefine HAVE_LOG2 ${HAVE_LOG2} + +/* Define to 1 if you have the `log10' function. */ +#cmakedefine HAVE_LOG10 ${HAVE_LOG10} + +/* Define to 1 if you have the `exp' function. */ +#cmakedefine HAVE_EXP ${HAVE_LOG} + +/* Define to 1 if you have the `exp2' function. */ +#cmakedefine HAVE_EXP2 ${HAVE_LOG2} + +/* Define to 1 if you have the `exp10' function. */ +#cmakedefine HAVE_EXP10 ${HAVE_LOG10} + /* Define to 1 if you have the `fmodf' function. */ #cmakedefine HAVE_FMODF ${HAVE_FMODF} diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index a4f8af4db028..5a3d02c553ee 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -72,12 +72,20 @@ /* Define to 1 if you have the header file. */ #undef HAVE_CRASHREPORTERCLIENT_H -/* Define if __crashreporter_info__ exists. */ +/* can use __crashreporter_info__ */ #undef HAVE_CRASHREPORTER_INFO /* Define to 1 if you have the header file. */ #undef HAVE_CTYPE_H +/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you + don't. */ +#undef HAVE_DECL_FE_ALL_EXCEPT + +/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you + don't. */ +#undef HAVE_DECL_FE_INEXACT + /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you don't. */ #undef HAVE_DECL_STRERROR_S @@ -122,6 +130,12 @@ /* Define to 1 if you have the header file. */ #undef HAVE_EXECINFO_H +/* Define to 1 if you have the `exp' function. */ +#undef HAVE_EXP + +/* Define to 1 if you have the `exp2' function. */ +#undef HAVE_EXP2 + /* Define to 1 if you have the header file. */ #undef HAVE_FCNTL_H @@ -225,6 +239,15 @@ the current directory to the dynamic linker search path. */ #undef HAVE_LINK_R +/* Define to 1 if you have the `log' function. */ +#undef HAVE_LOG + +/* Define to 1 if you have the `log10' function. */ +#undef HAVE_LOG10 + +/* Define to 1 if you have the `log2' function. */ +#undef HAVE_LOG2 + /* Define to 1 if you have the `longjmp' function. */ #undef HAVE_LONGJMP diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake index 39442926dc9b..eda17ee4a62b 100644 --- a/include/llvm/Config/llvm-config.h.cmake +++ b/include/llvm/Config/llvm-config.h.cmake @@ -112,10 +112,19 @@ /* Installation prefix directory */ #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}" +/* Define if we have the Intel JIT API runtime support library */ +#cmakedefine LLVM_USE_INTEL_JITEVENTS 1 + +/* Define if we have the oprofile JIT-support library */ +#cmakedefine LLVM_USE_OPROFILE 1 + /* Major version of the LLVM API */ #cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} /* Minor version of the LLVM API */ #cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR} +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SANITIZER_MSAN_INTERFACE_H ${HAVE_SANITIZER_MSAN_INTERFACE_H} + #endif diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in index 9489dfe01633..af3a32485525 100644 --- a/include/llvm/Config/llvm-config.h.in +++ b/include/llvm/Config/llvm-config.h.in @@ -112,6 +112,12 @@ /* Installation prefix directory */ #undef LLVM_PREFIX +/* Define if we have the Intel JIT API runtime support library */ +#undef LLVM_USE_INTEL_JITEVENTS + +/* Define if we have the oprofile JIT-support library */ +#undef LLVM_USE_OPROFILE + /* Major version of the LLVM API */ #undef LLVM_VERSION_MAJOR diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h index 2f0780068087..4f0aa07130ef 100644 --- a/include/llvm/DIBuilder.h +++ b/include/llvm/DIBuilder.h @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DIBUILDER_H -#define LLVM_ANALYSIS_DIBUILDER_H +#ifndef LLVM_DIBUILDER_H +#define LLVM_DIBUILDER_H -#include "llvm/Support/DataTypes.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class BasicBlock; @@ -28,6 +28,9 @@ namespace llvm { class LLVMContext; class MDNode; class StringRef; + class DIBasicType; + class DICompositeType; + class DIDerivedType; class DIDescriptor; class DIFile; class DIEnumerator; @@ -88,9 +91,12 @@ namespace llvm { /// by a tool analyzing generated debugging information. /// @param RV This indicates runtime version for languages like /// Objective-C. + /// @param SplitName The name of the file that we'll split debug info out + /// into. void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, - StringRef Producer, - bool isOptimized, StringRef Flags, unsigned RV); + StringRef Producer, bool isOptimized, + StringRef Flags, unsigned RV, + StringRef SplitName = StringRef()); /// createFile - Create a file descriptor to hold debugging information /// for a file. @@ -108,27 +114,32 @@ namespace llvm { /// @param SizeInBits Size of the type. /// @param AlignInBits Type alignment. /// @param Encoding DWARF encoding code, e.g. dwarf::DW_ATE_float. - DIType createBasicType(StringRef Name, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Encoding); + DIBasicType createBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, unsigned Encoding); /// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. /// @param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type /// @param FromTy Base Type. - DIType createQualifiedType(unsigned Tag, DIType FromTy); + DIDerivedType createQualifiedType(unsigned Tag, DIType FromTy); /// createPointerType - Create debugging information entry for a pointer. /// @param PointeeTy Type pointed by this pointer. /// @param SizeInBits Size. /// @param AlignInBits Alignment. (optional) /// @param Name Pointer type name. (optional) - DIType createPointerType(DIType PointeeTy, uint64_t SizeInBits, - uint64_t AlignInBits = 0, - StringRef Name = StringRef()); + DIDerivedType + createPointerType(DIType PointeeTy, uint64_t SizeInBits, + uint64_t AlignInBits = 0, StringRef Name = StringRef()); + + /// \brief Create debugging information entry for a pointer to member. + /// @param PointeeTy Type pointed to by this pointer. + /// @param Class Type for which this pointer points to members of. + DIDerivedType createMemberPointerType(DIType PointeeTy, DIType Class); /// createReferenceType - Create debugging information entry for a c++ /// style reference or rvalue reference type. - DIType createReferenceType(unsigned Tag, DIType RTy); + DIDerivedType createReferenceType(unsigned Tag, DIType RTy); /// createTypedef - Create debugging information entry for a typedef. /// @param Ty Original type. @@ -136,8 +147,8 @@ namespace llvm { /// @param File File where this type is defined. /// @param LineNo Line number. /// @param Context The surrounding context for the typedef. - DIType createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo, DIDescriptor Context); + DIDerivedType createTypedef(DIType Ty, StringRef Name, DIFile File, + unsigned LineNo, DIDescriptor Context); /// createFriend - Create debugging information entry for a 'friend'. DIType createFriend(DIType Ty, DIType FriendTy); @@ -149,8 +160,8 @@ namespace llvm { /// @param BaseOffset Base offset. /// @param Flags Flags to describe inheritance attribute, /// e.g. private - DIType createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, - unsigned Flags); + DIDerivedType createInheritance(DIType Ty, DIType BaseTy, + uint64_t BaseOffset, unsigned Flags); /// createMemberType - Create debugging information entry for a member. /// @param Scope Member scope. @@ -162,10 +173,23 @@ namespace llvm { /// @param OffsetInBits Member offset. /// @param Flags Flags to encode member attribute, e.g. private /// @param Ty Parent type. - DIType createMemberType(DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNo, uint64_t SizeInBits, - uint64_t AlignInBits, uint64_t OffsetInBits, - unsigned Flags, DIType Ty); + DIDerivedType + createMemberType(DIDescriptor Scope, StringRef Name, DIFile File, + unsigned LineNo, uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, DIType Ty); + + /// createStaticMemberType - Create debugging information entry for a + /// C++ static data member. + /// @param Scope Member scope. + /// @param Name Member name. + /// @param File File where this member is declared. + /// @param LineNo Line number. + /// @param Ty Type of the static member. + /// @param Flags Flags to encode member attribute, e.g. private. + /// @param Val Const initializer of the member. + DIType createStaticMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNo, DIType Ty, + unsigned Flags, llvm::Value *Val); /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. @@ -241,12 +265,13 @@ namespace llvm { /// DW_AT_containing_type. See DWARF documentation /// for more info. /// @param TemplateParms Template type parameters. - DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, uint64_t OffsetInBits, - unsigned Flags, DIType DerivedFrom, - DIArray Elements, MDNode *VTableHolder = 0, - MDNode *TemplateParms = 0); + DICompositeType createClassType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType DerivedFrom, DIArray Elements, + MDNode *VTableHolder = 0, + MDNode *TemplateParms = 0); /// createStructType - Create debugging information entry for a struct. /// @param Scope Scope in which this struct is defined. @@ -258,10 +283,12 @@ namespace llvm { /// @param Flags Flags to encode member attribute, e.g. private /// @param Elements Struct elements. /// @param RunTimeLang Optional parameter, Objective-C runtime version. - DIType createStructType(DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Flags, - DIArray Elements, unsigned RunTimeLang = 0); + DICompositeType createStructType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + unsigned Flags, DIType DerivedFrom, + DIArray Elements, unsigned RunTimeLang = 0, + MDNode *VTableHolder = 0); /// createUnionType - Create debugging information entry for an union. /// @param Scope Scope in which this union is defined. @@ -273,10 +300,10 @@ namespace llvm { /// @param Flags Flags to encode member attribute, e.g. private /// @param Elements Union elements. /// @param RunTimeLang Optional parameter, Objective-C runtime version. - DIType createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Flags, - DIArray Elements, unsigned RunTimeLang = 0); + DICompositeType createUnionType( + DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, + DIArray Elements, unsigned RunTimeLang = 0); /// createTemplateTypeParameter - Create debugging information for template /// type parameter. @@ -311,8 +338,8 @@ namespace llvm { /// @param AlignInBits Alignment. /// @param Ty Element type. /// @param Subscripts Subscripts. - DIType createArrayType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts); + DICompositeType createArrayType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts); /// createVectorType - Create debugging information entry for a vector type. /// @param Size Array size. @@ -331,16 +358,16 @@ namespace llvm { /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param Elements Enumeration elements. - DIType createEnumerationType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - DIArray Elements, DIType ClassType); + DICompositeType createEnumerationType( + DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, + DIType ClassType); /// createSubroutineType - Create subroutine type. /// @param File File in which this subroutine is defined. /// @param ParameterTypes An array of subroutine parameter types. This /// includes return type at 0th index. - DIType createSubroutineType(DIFile File, DIArray ParameterTypes); + DICompositeType createSubroutineType(DIFile File, DIArray ParameterTypes); /// createArtificialType - Create a new DIType with "artificial" flag set. DIType createArtificialType(DIType Ty); @@ -349,10 +376,6 @@ namespace llvm { /// flag set. DIType createObjectPointerType(DIType Ty); - /// createTemporaryType - Create a temporary forward-declared type. - DIType createTemporaryType(); - DIType createTemporaryType(DIFile F); - /// createForwardDecl - Create a temporary forward-declared type. DIType createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, unsigned RuntimeLang = 0, @@ -371,7 +394,7 @@ namespace llvm { /// getOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. - DISubrange getOrCreateSubrange(int64_t Lo, int64_t Hi); + DISubrange getOrCreateSubrange(int64_t Lo, int64_t Count); /// createGlobalVariable - Create a new descriptor for the specified global. /// @param Name Name of the variable. @@ -385,6 +408,19 @@ namespace llvm { createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, llvm::Value *Val); + /// \brief Create a new descriptor for the specified global. + /// @param Name Name of the variable. + /// @param LinkageName Mangled variable name. + /// @param File File where this variable is defined. + /// @param LineNo Line number. + /// @param Ty Variable Type. + /// @param isLocalToUnit Boolean flag indicate whether this variable is + /// externally visible or not. + /// @param Val llvm::Value of the variable. + DIGlobalVariable + createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile File, + unsigned LineNo, DIType Ty, bool isLocalToUnit, + llvm::Value *Val); /// createStaticVariable - Create a new descriptor for the specified /// variable. @@ -397,10 +433,12 @@ namespace llvm { /// @param isLocalToUnit Boolean flag indicate whether this variable is /// externally visible or not. /// @param Val llvm::Value of the variable. + /// @param Decl Reference to the corresponding declaration. DIGlobalVariable createStaticVariable(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, - DIType Ty, bool isLocalToUnit, llvm::Value *Val); + DIType Ty, bool isLocalToUnit, llvm::Value *Val, + MDNode *Decl = NULL); /// createLocalVariable - Create a new descriptor for the specified diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h index dae03ad10095..15f91870a574 100644 --- a/include/llvm/DebugInfo.h +++ b/include/llvm/DebugInfo.h @@ -14,11 +14,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_DEBUGINFO_H -#define LLVM_ANALYSIS_DEBUGINFO_H +#ifndef LLVM_DEBUGINFO_H +#define LLVM_DEBUGINFO_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Dwarf.h" @@ -61,7 +61,9 @@ namespace llvm { FlagExplicit = 1 << 7, FlagPrototyped = 1 << 8, FlagObjcClassComplete = 1 << 9, - FlagObjectPointer = 1 << 10 + FlagObjectPointer = 1 << 10, + FlagVector = 1 << 11, + FlagStaticMember = 1 << 12 }; protected: const MDNode *DbgNode; @@ -71,6 +73,7 @@ namespace llvm { return (unsigned)getUInt64Field(Elt); } uint64_t getUInt64Field(unsigned Elt) const; + int64_t getInt64Field(unsigned Elt) const; DIDescriptor getDescriptorField(unsigned Elt) const; template @@ -93,15 +96,11 @@ namespace llvm { explicit DIDescriptor(const DIVariable F); explicit DIDescriptor(const DIType F); - bool Verify() const { return DbgNode != 0; } + bool Verify() const; operator MDNode *() const { return const_cast(DbgNode); } MDNode *operator ->() const { return const_cast(DbgNode); } - unsigned getVersion() const { - return getUnsignedField(0) & LLVMDebugVersionMask; - } - unsigned getTag() const { return getUnsignedField(0) & ~LLVMDebugVersionMask; } @@ -141,8 +140,9 @@ namespace llvm { public: explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {} - uint64_t getLo() const { return getUInt64Field(1); } - uint64_t getHi() const { return getUInt64Field(2); } + int64_t getLo() const { return getInt64Field(1); } + int64_t getCount() const { return getInt64Field(2); } + bool Verify() const; }; /// DIArray - This descriptor holds an array of descriptors. @@ -169,6 +169,18 @@ namespace llvm { StringRef getDirectory() const; }; + /// DIFile - This is a wrapper for a file. + class DIFile : public DIScope { + friend class DIDescriptor; + public: + explicit DIFile(const MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isFile()) + DbgNode = 0; + } + MDNode *getFileNode() const; + bool Verify() const; + }; + /// DICompileUnit - A wrapper for a compile unit. class DICompileUnit : public DIScope { friend class DIDescriptor; @@ -176,51 +188,24 @@ namespace llvm { public: explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {} - unsigned getLanguage() const { return getUnsignedField(2); } - StringRef getFilename() const { return getStringField(3); } - StringRef getDirectory() const { return getStringField(4); } - StringRef getProducer() const { return getStringField(5); } + unsigned getLanguage() const { return getUnsignedField(2); } + StringRef getProducer() const { return getStringField(3); } - /// isMain - Each input file is encoded as a separate compile unit in LLVM - /// debugging information output. However, many target specific tool chains - /// prefer to encode only one compile unit in an object file. In this - /// situation, the LLVM code generator will include debugging information - /// entities in the compile unit that is marked as main compile unit. The - /// code generator accepts maximum one main compile unit per module. If a - /// module does not contain any main compile unit then the code generator - /// will emit multiple compile units in the output object file. - - bool isMain() const { return getUnsignedField(6) != 0; } - bool isOptimized() const { return getUnsignedField(7) != 0; } - StringRef getFlags() const { return getStringField(8); } - unsigned getRunTimeVersion() const { return getUnsignedField(9); } + bool isOptimized() const { return getUnsignedField(4) != 0; } + StringRef getFlags() const { return getStringField(5); } + unsigned getRunTimeVersion() const { return getUnsignedField(6); } DIArray getEnumTypes() const; DIArray getRetainedTypes() const; DIArray getSubprograms() const; DIArray getGlobalVariables() const; + StringRef getSplitDebugFilename() const { return getStringField(11); } + /// Verify - Verify that a compile unit is well formed. bool Verify() const; }; - /// DIFile - This is a wrapper for a file. - class DIFile : public DIScope { - friend class DIDescriptor; - void printInternal(raw_ostream &OS) const {} // FIXME: Output something? - public: - explicit DIFile(const MDNode *N = 0) : DIScope(N) { - if (DbgNode && !isFile()) - DbgNode = 0; - } - StringRef getFilename() const { return getStringField(1); } - StringRef getDirectory() const { return getStringField(2); } - DICompileUnit getCompileUnit() const{ - assert (getVersion() <= LLVMDebugVersion10 && "Invalid CompileUnit!"); - return getFieldAs(3); - } - }; - /// DIEnumerator - A wrapper for an enumerator (e.g. X and Y in 'enum {X,Y}'). /// FIXME: it seems strange that this doesn't have either a reference to the /// type/precision or a file/line pair for location info. @@ -232,6 +217,7 @@ namespace llvm { StringRef getName() const { return getStringField(1); } uint64_t getEnumValue() const { return getUInt64Field(2); } + bool Verify() const; }; /// DIType - This is a wrapper for a type. @@ -250,16 +236,8 @@ namespace llvm { explicit DIType(const MDNode *N); explicit DIType() {} - DIScope getContext() const { return getFieldAs(1); } - StringRef getName() const { return getStringField(2); } - DICompileUnit getCompileUnit() const{ - assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); - if (getVersion() == llvm::LLVMDebugVersion7) - return getFieldAs(3); - - return getFieldAs(3).getCompileUnit(); - } - DIFile getFile() const { return getFieldAs(3); } + DIScope getContext() const { return getFieldAs(2); } + StringRef getName() const { return getStringField(3); } unsigned getLineNumber() const { return getUnsignedField(4); } uint64_t getSizeInBits() const { return getUInt64Field(5); } uint64_t getAlignInBits() const { return getUInt64Field(6); } @@ -295,21 +273,15 @@ namespace llvm { bool isObjcClassComplete() const { return (getFlags() & FlagObjcClassComplete) != 0; } + bool isVector() const { + return (getFlags() & FlagVector) != 0; + } + bool isStaticMember() const { + return (getFlags() & FlagStaticMember) != 0; + } bool isValid() const { return DbgNode && (isBasicType() || isDerivedType() || isCompositeType()); } - StringRef getDirectory() const { - if (getVersion() == llvm::LLVMDebugVersion7) - return getCompileUnit().getDirectory(); - - return getFieldAs(3).getDirectory(); - } - StringRef getFilename() const { - if (getVersion() == llvm::LLVMDebugVersion7) - return getCompileUnit().getFilename(); - - return getFieldAs(3).getFilename(); - } /// isUnsignedDIType - Return true if type encoding is unsigned. bool isUnsignedDIType(); @@ -332,7 +304,8 @@ namespace llvm { }; /// DIDerivedType - A simple derived type, like a const qualified type, - /// a typedef, a pointer or reference, etc. + /// a typedef, a pointer or reference, et cetera. Or, a data member of + /// a class/struct/union. class DIDerivedType : public DIType { friend class DIDescriptor; void printInternal(raw_ostream &OS) const; @@ -349,46 +322,18 @@ namespace llvm { /// return base type size. uint64_t getOriginalTypeSize() const; - /// getObjCProperty - Return property node, if this ivar is + /// getObjCProperty - Return property node, if this ivar is /// associated with one. MDNode *getObjCProperty() const; - StringRef getObjCPropertyName() const { - if (getVersion() > LLVMDebugVersion11) - return StringRef(); - return getStringField(10); + DIType getClassType() const { + assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); + return getFieldAs(10); } - StringRef getObjCPropertyGetterName() const { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return getStringField(11); - } - StringRef getObjCPropertySetterName() const { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return getStringField(12); - } - bool isReadOnlyObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0; - } - bool isReadWriteObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0; - } - bool isAssignObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0; - } - bool isRetainObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0; - } - bool isCopyObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0; - } - bool isNonAtomicObjCProperty() { - assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); - return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0; + + Constant *getConstant() const { + assert((getTag() == dwarf::DW_TAG_member) && isStaticMember()); + return getConstantField(10); } /// Verify - Verify that a derived type descriptor is well formed. @@ -409,10 +354,12 @@ namespace llvm { } DIArray getTypeArray() const { return getFieldAs(10); } + void setTypeArray(DIArray Elements, DIArray TParams = DIArray()); unsigned getRunTimeLang() const { return getUnsignedField(11); } DICompositeType getContainingType() const { return getFieldAs(12); } + void setContainingType(DICompositeType ContainingType); DIArray getTemplateParams() const { return getFieldAs(13); } /// Verify - Verify that a composite type descriptor is well formed. @@ -427,14 +374,15 @@ namespace llvm { DIScope getContext() const { return getFieldAs(1); } StringRef getName() const { return getStringField(2); } DIType getType() const { return getFieldAs(3); } - StringRef getFilename() const { + StringRef getFilename() const { return getFieldAs(4).getFilename(); } - StringRef getDirectory() const { + StringRef getDirectory() const { return getFieldAs(4).getDirectory(); } unsigned getLineNumber() const { return getUnsignedField(5); } unsigned getColumnNumber() const { return getUnsignedField(6); } + bool Verify() const; }; /// DITemplateValueParameter - This is a wrapper for template value parameter. @@ -446,14 +394,15 @@ namespace llvm { StringRef getName() const { return getStringField(2); } DIType getType() const { return getFieldAs(3); } uint64_t getValue() const { return getUInt64Field(4); } - StringRef getFilename() const { + StringRef getFilename() const { return getFieldAs(5).getFilename(); } - StringRef getDirectory() const { + StringRef getDirectory() const { return getFieldAs(5).getDirectory(); } unsigned getLineNumber() const { return getUnsignedField(6); } unsigned getColumnNumber() const { return getUnsignedField(7); } + bool Verify() const; }; /// DISubprogram - This is a wrapper for a subprogram (e.g. a function). @@ -467,93 +416,66 @@ namespace llvm { StringRef getName() const { return getStringField(3); } StringRef getDisplayName() const { return getStringField(4); } StringRef getLinkageName() const { return getStringField(5); } - DICompileUnit getCompileUnit() const{ - assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); - if (getVersion() == llvm::LLVMDebugVersion7) - return getFieldAs(6); - - return getFieldAs(6).getCompileUnit(); - } - unsigned getLineNumber() const { return getUnsignedField(7); } - DICompositeType getType() const { return getFieldAs(8); } + unsigned getLineNumber() const { return getUnsignedField(6); } + DICompositeType getType() const { return getFieldAs(7); } /// getReturnTypeName - Subprogram return types are encoded either as /// DIType or as DICompositeType. StringRef getReturnTypeName() const { - DICompositeType DCT(getFieldAs(8)); + DICompositeType DCT(getFieldAs(7)); if (DCT.Verify()) { DIArray A = DCT.getTypeArray(); DIType T(A.getElement(0)); return T.getName(); } - DIType T(getFieldAs(8)); + DIType T(getFieldAs(7)); return T.getName(); } /// isLocalToUnit - Return true if this subprogram is local to the current /// compile unit, like 'static' in C. - unsigned isLocalToUnit() const { return getUnsignedField(9); } - unsigned isDefinition() const { return getUnsignedField(10); } + unsigned isLocalToUnit() const { return getUnsignedField(8); } + unsigned isDefinition() const { return getUnsignedField(9); } - unsigned getVirtuality() const { return getUnsignedField(11); } - unsigned getVirtualIndex() const { return getUnsignedField(12); } + unsigned getVirtuality() const { return getUnsignedField(10); } + unsigned getVirtualIndex() const { return getUnsignedField(11); } DICompositeType getContainingType() const { - return getFieldAs(13); + return getFieldAs(12); } - unsigned isArtificial() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return getUnsignedField(14); - return (getUnsignedField(14) & FlagArtificial) != 0; + unsigned getFlags() const { + return getUnsignedField(13); + } + + unsigned isArtificial() const { + return (getUnsignedField(13) & FlagArtificial) != 0; } /// isPrivate - Return true if this subprogram has "private" /// access specifier. - bool isPrivate() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return false; - return (getUnsignedField(14) & FlagPrivate) != 0; + bool isPrivate() const { + return (getUnsignedField(13) & FlagPrivate) != 0; } /// isProtected - Return true if this subprogram has "protected" /// access specifier. - bool isProtected() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return false; - return (getUnsignedField(14) & FlagProtected) != 0; + bool isProtected() const { + return (getUnsignedField(13) & FlagProtected) != 0; } /// isExplicit - Return true if this subprogram is marked as explicit. - bool isExplicit() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return false; - return (getUnsignedField(14) & FlagExplicit) != 0; + bool isExplicit() const { + return (getUnsignedField(13) & FlagExplicit) != 0; } /// isPrototyped - Return true if this subprogram is prototyped. - bool isPrototyped() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return false; - return (getUnsignedField(14) & FlagPrototyped) != 0; + bool isPrototyped() const { + return (getUnsignedField(13) & FlagPrototyped) != 0; } unsigned isOptimized() const; - StringRef getFilename() const { - if (getVersion() == llvm::LLVMDebugVersion7) - return getCompileUnit().getFilename(); - - return getFieldAs(6).getFilename(); - } - - StringRef getDirectory() const { - if (getVersion() == llvm::LLVMDebugVersion7) - return getCompileUnit().getFilename(); - - return getFieldAs(6).getDirectory(); - } - /// getScopeLineNumber - Get the beginning of the scope of the /// function, not necessarily where the name of the program /// starts. - unsigned getScopeLineNumber() const { return getUnsignedField(20); } + unsigned getScopeLineNumber() const { return getUnsignedField(19); } /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; @@ -562,11 +484,11 @@ namespace llvm { /// information for the function F. bool describes(const Function *F); - Function *getFunction() const { return getFunctionField(16); } - void replaceFunction(Function *F) { replaceFunctionField(16, F); } - DIArray getTemplateParams() const { return getFieldAs(17); } + Function *getFunction() const { return getFunctionField(15); } + void replaceFunction(Function *F) { replaceFunctionField(15, F); } + DIArray getTemplateParams() const { return getFieldAs(16); } DISubprogram getFunctionDeclaration() const { - return getFieldAs(18); + return getFieldAs(17); } MDNode *getVariablesNodes() const; DIArray getVariables() const; @@ -583,25 +505,13 @@ namespace llvm { StringRef getName() const { return getStringField(3); } StringRef getDisplayName() const { return getStringField(4); } StringRef getLinkageName() const { return getStringField(5); } - DICompileUnit getCompileUnit() const{ - assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); - if (getVersion() == llvm::LLVMDebugVersion7) - return getFieldAs(6); - - DIFile F = getFieldAs(6); - return F.getCompileUnit(); - } StringRef getFilename() const { - if (getVersion() <= llvm::LLVMDebugVersion10) - return getContext().getFilename(); return getFieldAs(6).getFilename(); - } + } StringRef getDirectory() const { - if (getVersion() <= llvm::LLVMDebugVersion10) - return getContext().getDirectory(); return getFieldAs(6).getDirectory(); - } + } unsigned getLineNumber() const { return getUnsignedField(7); } DIType getType() const { return getFieldAs(8); } @@ -610,6 +520,9 @@ namespace llvm { GlobalVariable *getGlobal() const { return getGlobalVariableField(11); } Constant *getConstant() const { return getConstantField(11); } + DIDerivedType getStaticDataMemberDeclaration() const { + return getFieldAs(12); + } /// Verify - Verify that a global variable descriptor is well formed. bool Verify() const; @@ -626,27 +539,18 @@ namespace llvm { DIScope getContext() const { return getFieldAs(1); } StringRef getName() const { return getStringField(2); } - DICompileUnit getCompileUnit() const { - assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); - if (getVersion() == llvm::LLVMDebugVersion7) - return getFieldAs(3); - - DIFile F = getFieldAs(3); - return F.getCompileUnit(); - } - unsigned getLineNumber() const { - return (getUnsignedField(4) << 8) >> 8; + DIFile getFile() const { return getFieldAs(3); } + unsigned getLineNumber() const { + return (getUnsignedField(4) << 8) >> 8; } unsigned getArgNumber() const { - unsigned L = getUnsignedField(4); + unsigned L = getUnsignedField(4); return L >> 24; } DIType getType() const { return getFieldAs(5); } - + /// isArtificial - Return true if this variable is marked as "artificial". - bool isArtificial() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return false; + bool isArtificial() const { return (getUnsignedField(6) & FlagArtificial) != 0; } @@ -666,12 +570,8 @@ namespace llvm { } unsigned getNumAddrElements() const; - + uint64_t getAddrElement(unsigned Idx) const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return getUInt64Field(Idx+6); - if (getVersion() == llvm::LLVMDebugVersion9) - return getUInt64Field(Idx+7); return getUInt64Field(Idx+8); } @@ -681,7 +581,7 @@ namespace llvm { return getType().isBlockByrefStruct(); } - /// isInlinedFnArgument - Return trule if this variable provides debugging + /// isInlinedFnArgument - Return true if this variable provides debugging /// information for an inlined function arguments. bool isInlinedFnArgument(const Function *CurFn); @@ -692,17 +592,10 @@ namespace llvm { class DILexicalBlock : public DIScope { public: explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {} - DIScope getContext() const { return getFieldAs(1); } - unsigned getLineNumber() const { return getUnsignedField(2); } - unsigned getColumnNumber() const { return getUnsignedField(3); } - StringRef getDirectory() const { - StringRef dir = getFieldAs(4).getDirectory(); - return !dir.empty() ? dir : getContext().getDirectory(); - } - StringRef getFilename() const { - StringRef filename = getFieldAs(4).getFilename(); - return !filename.empty() ? filename : getContext().getFilename(); - } + DIScope getContext() const { return getFieldAs(2); } + unsigned getLineNumber() const { return getUnsignedField(3); } + unsigned getColumnNumber() const { return getUnsignedField(4); } + bool Verify() const; }; /// DILexicalBlockFile - This is a wrapper for a lexical block with @@ -710,40 +603,21 @@ namespace llvm { class DILexicalBlockFile : public DIScope { public: explicit DILexicalBlockFile(const MDNode *N = 0) : DIScope(N) {} - DIScope getContext() const { return getScope().getContext(); } + DIScope getContext() const { if (getScope().isSubprogram()) return getScope(); return getScope().getContext(); } unsigned getLineNumber() const { return getScope().getLineNumber(); } unsigned getColumnNumber() const { return getScope().getColumnNumber(); } - StringRef getDirectory() const { - StringRef dir = getFieldAs(2).getDirectory(); - return !dir.empty() ? dir : getContext().getDirectory(); - } - StringRef getFilename() const { - StringRef filename = getFieldAs(2).getFilename(); - assert(!filename.empty() && "Why'd you create this then?"); - return filename; - } - DILexicalBlock getScope() const { return getFieldAs(1); } + DILexicalBlock getScope() const { return getFieldAs(2); } + bool Verify() const; }; /// DINameSpace - A wrapper for a C++ style name space. - class DINameSpace : public DIScope { + class DINameSpace : public DIScope { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {} - DIScope getContext() const { return getFieldAs(1); } - StringRef getName() const { return getStringField(2); } - StringRef getDirectory() const { - return getFieldAs(3).getDirectory(); - } - StringRef getFilename() const { - return getFieldAs(3).getFilename(); - } - DICompileUnit getCompileUnit() const{ - assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); - if (getVersion() == llvm::LLVMDebugVersion7) - return getFieldAs(3); - - return getFieldAs(3).getCompileUnit(); - } + DIScope getContext() const { return getFieldAs(2); } + StringRef getName() const { return getStringField(3); } unsigned getLineNumber() const { return getUnsignedField(4); } bool Verify() const; }; @@ -818,7 +692,7 @@ namespace llvm { /// to hold function specific information. NamedMDNode *getOrInsertFnSpecificMDNode(Module &M, DISubprogram SP); - /// getFnSpecificMDNode - Return a NameMDNode, if available, that is + /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. NamedMDNode *getFnSpecificMDNode(const Module &M, DISubprogram SP); @@ -836,7 +710,7 @@ namespace llvm { public: /// processModule - Process entire module and collect debug info /// anchors. - void processModule(Module &M); + void processModule(const Module &M); private: /// processType - Process DIType. @@ -849,7 +723,7 @@ namespace llvm { void processSubprogram(DISubprogram SP); /// processDeclare - Process DbgDeclareInst. - void processDeclare(DbgDeclareInst *DDI); + void processDeclare(const DbgDeclareInst *DDI); /// processLocation - Process DILocation. void processLocation(DILocation Loc); diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 26bd1f627526..8fcd9e0b8246 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -16,9 +16,11 @@ #define LLVM_DEBUGINFO_DICONTEXT_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/RelocVisitor.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -56,6 +58,8 @@ public: } }; +typedef SmallVector, 16> DILineInfoTable; + /// DIInliningInfo - a format-neutral container for inlined code description. class DIInliningInfo { SmallVector Frames; @@ -90,6 +94,24 @@ public: } }; +/// Selects which debug sections get dumped. +enum DIDumpType { + DIDT_Null, + DIDT_All, + DIDT_Abbrev, + DIDT_AbbrevDwo, + DIDT_Aranges, + DIDT_Frames, + DIDT_Info, + DIDT_InfoDwo, + DIDT_Line, + DIDT_Ranges, + DIDT_Pubnames, + DIDT_Str, + DIDT_StrDwo, + DIDT_StrOffsetsDwo +}; + // In place of applying the relocations to the data we've read from disk we use // a separate mapping table to the side and checking that at locations in the // dwarf where we expect relocated values. This adds a bit of complexity to the @@ -102,19 +124,14 @@ public: virtual ~DIContext(); /// getDWARFContext - get a context for binary DWARF data. - static DIContext *getDWARFContext(bool isLittleEndian, - StringRef infoSection, - StringRef abbrevSection, - StringRef aRangeSection = StringRef(), - StringRef lineSection = StringRef(), - StringRef stringSection = StringRef(), - StringRef rangeSection = StringRef(), - const RelocAddrMap &Map = RelocAddrMap()); + static DIContext *getDWARFContext(object::ObjectFile *); - virtual void dump(raw_ostream &OS) = 0; + virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All) = 0; virtual DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; + virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address, + uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; }; diff --git a/include/llvm/DefaultPasses.h b/include/llvm/DefaultPasses.h deleted file mode 100644 index 9f1ade86aba6..000000000000 --- a/include/llvm/DefaultPasses.h +++ /dev/null @@ -1,168 +0,0 @@ -//===- llvm/DefaultPasses.h - Default Pass Support code --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This file defines the infrastructure for registering the standard pass list. -// This defines sets of standard optimizations that plugins can modify and -// front ends can use. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEFAULT_PASS_SUPPORT_H -#define LLVM_DEFAULT_PASS_SUPPORT_H - -#include "llvm/PassSupport.h" - -namespace llvm { - -class PassManagerBase; - -/// Unique identifiers for the default standard passes. The addresses of -/// these symbols are used to uniquely identify passes from the default list. -namespace DefaultStandardPasses { -extern unsigned char AggressiveDCEID; -extern unsigned char ArgumentPromotionID; -extern unsigned char BasicAliasAnalysisID; -extern unsigned char CFGSimplificationID; -extern unsigned char ConstantMergeID; -extern unsigned char CorrelatedValuePropagationID; -extern unsigned char DeadArgEliminationID; -extern unsigned char DeadStoreEliminationID; -extern unsigned char EarlyCSEID; -extern unsigned char FunctionAttrsID; -extern unsigned char FunctionInliningID; -extern unsigned char GVNID; -extern unsigned char GlobalDCEID; -extern unsigned char GlobalOptimizerID; -extern unsigned char GlobalsModRefID; -extern unsigned char IPSCCPID; -extern unsigned char IndVarSimplifyID; -extern unsigned char InlinerPlaceholderID; -extern unsigned char InstructionCombiningID; -extern unsigned char JumpThreadingID; -extern unsigned char LICMID; -extern unsigned char LoopDeletionID; -extern unsigned char LoopIdiomID; -extern unsigned char LoopRotateID; -extern unsigned char LoopUnrollID; -extern unsigned char LoopUnswitchID; -extern unsigned char MemCpyOptID; -extern unsigned char PruneEHID; -extern unsigned char ReassociateID; -extern unsigned char SCCPID; -extern unsigned char ScalarReplAggregatesID; -extern unsigned char SimplifyLibCallsID; -extern unsigned char StripDeadPrototypesID; -extern unsigned char TailCallEliminationID; -extern unsigned char TypeBasedAliasAnalysisID; -} - -/// StandardPass - The class responsible for maintaining the lists of standard -class StandardPass { - friend class RegisterStandardPassLists; - public: - /// Predefined standard sets of passes - enum StandardSet { - AliasAnalysis, - Function, - Module, - LTO - }; - /// Flags to specify whether a pass should be enabled. Passes registered - /// with the standard sets may specify a minimum optimization level and one - /// or more flags that must be set when constructing the set for the pass to - /// be used. - enum OptimizationFlags { - /// Optimize for size was requested. - OptimizeSize = 1<<0, - /// Allow passes which may make global module changes. - UnitAtATime = 1<<1, - /// UnrollLoops - Allow loop unrolling. - UnrollLoops = 1<<2, - /// Allow library calls to be simplified. - SimplifyLibCalls = 1<<3, - /// Whether the module may have code using exceptions. - HaveExceptions = 1<<4, - // Run an inliner pass as part of this set. - RunInliner = 1<<5 - }; - enum OptimizationFlagComponents { - /// The low bits are used to store the optimization level. When requesting - /// passes, this should store the requested optimisation level. When - /// setting passes, this should set the minimum optimization level at which - /// the pass will run. - OptimizationLevelMask=0xf, - /// The maximum optimisation level at which the pass is run. - MaxOptimizationLevelMask=0xf0, - // Flags that must be set - RequiredFlagMask=0xff00, - // Flags that may not be set. - DisallowedFlagMask=0xff0000, - MaxOptimizationLevelShift=4, - RequiredFlagShift=8, - DisallowedFlagShift=16 - }; - /// Returns the optimisation level from a set of flags. - static unsigned OptimizationLevel(unsigned flags) { - return flags & OptimizationLevelMask; - } - /// Returns the maximum optimization level for this set of flags - static unsigned MaxOptimizationLevel(unsigned flags) { - return (flags & MaxOptimizationLevelMask) >> 4; - } - /// Constructs a set of flags from the specified minimum and maximum - /// optimisation level - static unsigned OptimzationFlags(unsigned minLevel=0, unsigned maxLevel=0xf, - unsigned requiredFlags=0, unsigned disallowedFlags=0) { - return ((minLevel & OptimizationLevelMask) | - ((maxLevel<> RequiredFlagShift; - } - /// Returns the flags that must not be set for this to match - static unsigned DisallowedFlags(unsigned flags) { - return (flags & DisallowedFlagMask) >> DisallowedFlagShift; - } - /// Register a standard pass in the specified set. If flags is non-zero, - /// then the pass will only be returned when the specified flags are set. - template - class RegisterStandardPass { - public: - RegisterStandardPass(StandardSet set, unsigned char *runBefore=0, - unsigned flags=0, unsigned char *ID=0) { - // Use the pass's ID if one is not specified - RegisterDefaultPass(PassInfo::NormalCtor_t(callDefaultCtor), - ID ? ID : (unsigned char*)&passName::ID, runBefore, set, flags); - } - }; - /// Adds the passes from the specified set to the provided pass manager - static void AddPassesFromSet(PassManagerBase *PM, - StandardSet set, - unsigned flags=0, - bool VerifyEach=false, - Pass *inliner=0); - private: - /// Registers the default passes. This is set by RegisterStandardPassLists - /// and is called lazily. - static void (*RegisterDefaultPasses)(void); - /// Creates the verifier pass that is inserted when a VerifyEach is passed to - /// AddPassesFromSet() - static Pass* (*CreateVerifierPass)(void); - /// Registers the pass - static void RegisterDefaultPass(PassInfo::NormalCtor_t constructor, - unsigned char *newPass, - unsigned char *oldPass, - StandardSet set, - unsigned flags=0); -}; - -} // namespace llvm - -#endif diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 8073d8f92c51..3fd69e266b47 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -12,22 +12,22 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_H -#define LLVM_EXECUTION_ENGINE_H +#ifndef LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H +#define LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H -#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ValueMap.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include #include #include +#include namespace llvm { diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h index a2fed98c150e..0e92f79eba8f 100644 --- a/include/llvm/ExecutionEngine/GenericValue.h +++ b/include/llvm/ExecutionEngine/GenericValue.h @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// -#ifndef GENERIC_VALUE_H -#define GENERIC_VALUE_H +#ifndef LLVM_EXECUTIONENGINE_GENERICVALUE_H +#define LLVM_EXECUTIONENGINE_GENERICVALUE_H #include "llvm/ADT/APInt.h" #include "llvm/Support/DataTypes.h" @@ -24,21 +24,30 @@ typedef void* PointerTy; class APInt; struct GenericValue { + struct IntPair { + unsigned int first; + unsigned int second; + }; union { double DoubleVal; float FloatVal; PointerTy PointerVal; - struct { unsigned int first; unsigned int second; } UIntPairVal; + struct IntPair UIntPairVal; unsigned char Untyped[8]; }; - APInt IntVal; // also used for long doubles + APInt IntVal; // also used for long doubles. + // For aggregate data types. + std::vector AggregateVal; - GenericValue() : DoubleVal(0.0), IntVal(1,0) {} + // to make code faster, set GenericValue to zero could be omitted, but it is + // potentially can cause problems, since GenericValue to store garbage + // instead of zero. + GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;} explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { } }; inline GenericValue PTOGV(void *P) { return GenericValue(P); } inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; } -} // End llvm namespace +} // End llvm namespace. #endif diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h index 72d97ef8e12b..f49d0c487fe9 100644 --- a/include/llvm/ExecutionEngine/Interpreter.h +++ b/include/llvm/ExecutionEngine/Interpreter.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef EXECUTION_ENGINE_INTERPRETER_H -#define EXECUTION_ENGINE_INTERPRETER_H +#ifndef LLVM_EXECUTIONENGINE_INTERPRETER_H +#define LLVM_EXECUTIONENGINE_INTERPRETER_H #include "llvm/ExecutionEngine/ExecutionEngine.h" #include diff --git a/include/llvm/ExecutionEngine/JIT.h b/include/llvm/ExecutionEngine/JIT.h index b4cda1d513f1..581d6e6c35eb 100644 --- a/include/llvm/ExecutionEngine/JIT.h +++ b/include/llvm/ExecutionEngine/JIT.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_JIT_H -#define LLVM_EXECUTION_ENGINE_JIT_H +#ifndef LLVM_EXECUTIONENGINE_JIT_H +#define LLVM_EXECUTIONENGINE_JIT_H #include "llvm/ExecutionEngine/ExecutionEngine.h" #include diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h index e6586e778c19..ed66102d4696 100644 --- a/include/llvm/ExecutionEngine/JITEventListener.h +++ b/include/llvm/ExecutionEngine/JITEventListener.h @@ -12,13 +12,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H -#define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H +#ifndef LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H +#define LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H -#include "llvm/Config/config.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" - #include namespace llvm { @@ -128,4 +127,4 @@ public: } // end namespace llvm. -#endif // defined LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H +#endif // defined LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h index 90896465018c..714a98055a42 100644 --- a/include/llvm/ExecutionEngine/JITMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITMemoryManager.h @@ -7,12 +7,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H -#define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H +#ifndef LLVM_EXECUTIONENGINE_JITMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_JITMEMORYMANAGER_H #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/DataTypes.h" - #include namespace llvm { diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h index ac16bdc7df17..66ddb7cdb875 100644 --- a/include/llvm/ExecutionEngine/MCJIT.h +++ b/include/llvm/ExecutionEngine/MCJIT.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_MCJIT_H -#define LLVM_EXECUTION_ENGINE_MCJIT_H +#ifndef LLVM_EXECUTIONENGINE_MCJIT_H +#define LLVM_EXECUTIONENGINE_MCJIT_H #include "llvm/ExecutionEngine/ExecutionEngine.h" #include diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h index ab7f25e9d03d..05da594a94a8 100644 --- a/include/llvm/ExecutionEngine/OProfileWrapper.h +++ b/include/llvm/ExecutionEngine/OProfileWrapper.h @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef OPROFILE_WRAPPER_H -#define OPROFILE_WRAPPER_H +#ifndef LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H +#define LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H #include "llvm/Support/DataTypes.h" #include @@ -41,10 +41,10 @@ class OProfileWrapper { typedef int (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t); // Also used for op_minor_version function which has the same signature - typedef int (*op_major_version_ptr_t)(void); + typedef int (*op_major_version_ptr_t)(); // This is not a part of the opagent API, but is useful nonetheless - typedef bool (*IsOProfileRunningPtrT)(void); + typedef bool (*IsOProfileRunningPtrT)(); op_agent_t Agent; @@ -99,8 +99,8 @@ public: size_t num_entries, struct debug_line_info const* info); int op_unload_native_code(uint64_t addr); - int op_major_version(void); - int op_minor_version(void); + int op_major_version(); + int op_minor_version(); // Returns true if the oprofiled process is running, the opagent library is // loaded and a connection to the agent has been established, and false @@ -121,4 +121,4 @@ private: } // namespace llvm -#endif //OPROFILE_WRAPPER_H +#endif // LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H diff --git a/include/llvm/ExecutionEngine/ObjectBuffer.h b/include/llvm/ExecutionEngine/ObjectBuffer.h index a0a77b8ba888..96a48b28b847 100644 --- a/include/llvm/ExecutionEngine/ObjectBuffer.h +++ b/include/llvm/ExecutionEngine/ObjectBuffer.h @@ -1,80 +1,80 @@ -//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares a wrapper class to hold the memory into which an -// object will be generated. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H -#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace llvm { - -/// ObjectBuffer - This class acts as a container for the memory buffer used during -/// generation and loading of executable objects using MCJIT and RuntimeDyld. The -/// underlying memory for the object will be owned by the ObjectBuffer instance -/// throughout its lifetime. The getMemBuffer() method provides a way to create a -/// MemoryBuffer wrapper object instance to be owned by other classes (such as -/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the -/// actual memory it points to. -class ObjectBuffer { -public: - ObjectBuffer() {} - ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {} - virtual ~ObjectBuffer() {} - - /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function - /// returns a pointer to an object that is owned by the caller. However, - /// the caller does not take ownership of the underlying memory. - MemoryBuffer *getMemBuffer() const { - return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false); - } - - const char *getBufferStart() const { return Buffer->getBufferStart(); } - size_t getBufferSize() const { return Buffer->getBufferSize(); } - -protected: - // The memory contained in an ObjectBuffer - OwningPtr Buffer; -}; - -/// ObjectBufferStream - This class encapsulates the SmallVector and -/// raw_svector_ostream needed to generate an object using MC code emission -/// while providing a common ObjectBuffer interface for access to the -/// memory once the object has been generated. -class ObjectBufferStream : public ObjectBuffer { -public: - ObjectBufferStream() : OS(SV) {} - virtual ~ObjectBufferStream() {} - - raw_ostream &getOStream() { return OS; } - void flush() - { - OS.flush(); - - // Make the data accessible via the ObjectBuffer::Buffer - Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()), - "", - false)); - } - -protected: - SmallVector SV; // Working buffer into which we JIT. - raw_svector_ostream OS; // streaming wrapper -}; - -} // namespace llvm - -#endif +//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares a wrapper class to hold the memory into which an +// object will be generated. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H +#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// ObjectBuffer - This class acts as a container for the memory buffer used during +/// generation and loading of executable objects using MCJIT and RuntimeDyld. The +/// underlying memory for the object will be owned by the ObjectBuffer instance +/// throughout its lifetime. The getMemBuffer() method provides a way to create a +/// MemoryBuffer wrapper object instance to be owned by other classes (such as +/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the +/// actual memory it points to. +class ObjectBuffer { +public: + ObjectBuffer() {} + ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {} + virtual ~ObjectBuffer() {} + + /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function + /// returns a pointer to an object that is owned by the caller. However, + /// the caller does not take ownership of the underlying memory. + MemoryBuffer *getMemBuffer() const { + return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false); + } + + const char *getBufferStart() const { return Buffer->getBufferStart(); } + size_t getBufferSize() const { return Buffer->getBufferSize(); } + +protected: + // The memory contained in an ObjectBuffer + OwningPtr Buffer; +}; + +/// ObjectBufferStream - This class encapsulates the SmallVector and +/// raw_svector_ostream needed to generate an object using MC code emission +/// while providing a common ObjectBuffer interface for access to the +/// memory once the object has been generated. +class ObjectBufferStream : public ObjectBuffer { +public: + ObjectBufferStream() : OS(SV) {} + virtual ~ObjectBufferStream() {} + + raw_ostream &getOStream() { return OS; } + void flush() + { + OS.flush(); + + // Make the data accessible via the ObjectBuffer::Buffer + Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()), + "", + false)); + } + +protected: + SmallVector SV; // Working buffer into which we JIT. + raw_svector_ostream OS; // streaming wrapper +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/ExecutionEngine/ObjectImage.h b/include/llvm/ExecutionEngine/ObjectImage.h index 82549add62e8..9fddca7e33c8 100644 --- a/include/llvm/ExecutionEngine/ObjectImage.h +++ b/include/llvm/ExecutionEngine/ObjectImage.h @@ -1,61 +1,63 @@ -//===---- ObjectImage.h - Format independent executuable object image -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares a file format independent ObjectImage class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H -#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H - -#include "llvm/Object/ObjectFile.h" -#include "llvm/ExecutionEngine/ObjectBuffer.h" - -namespace llvm { - - -/// ObjectImage - A container class that represents an ObjectFile that has been -/// or is in the process of being loaded into memory for execution. -class ObjectImage { - ObjectImage() LLVM_DELETED_FUNCTION; - ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION; - -protected: - OwningPtr Buffer; - -public: - ObjectImage(ObjectBuffer *Input) : Buffer(Input) {} - virtual ~ObjectImage() {} - - virtual object::symbol_iterator begin_symbols() const = 0; - virtual object::symbol_iterator end_symbols() const = 0; - - virtual object::section_iterator begin_sections() const = 0; - virtual object::section_iterator end_sections() const = 0; - - virtual /* Triple::ArchType */ unsigned getArch() const = 0; - - // Subclasses can override these methods to update the image with loaded - // addresses for sections and common symbols - virtual void updateSectionAddress(const object::SectionRef &Sec, - uint64_t Addr) = 0; - virtual void updateSymbolAddress(const object::SymbolRef &Sym, - uint64_t Addr) = 0; - - virtual StringRef getData() const = 0; - - // Subclasses can override these methods to provide JIT debugging support - virtual void registerWithDebugger() = 0; - virtual void deregisterWithDebugger() = 0; -}; - -} // end namespace llvm - -#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H - +//===---- ObjectImage.h - Format independent executuable object image -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares a file format independent ObjectImage class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H +#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H + +#include "llvm/ExecutionEngine/ObjectBuffer.h" +#include "llvm/Object/ObjectFile.h" + +namespace llvm { + + +/// ObjectImage - A container class that represents an ObjectFile that has been +/// or is in the process of being loaded into memory for execution. +class ObjectImage { + ObjectImage() LLVM_DELETED_FUNCTION; + ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION; + +protected: + OwningPtr Buffer; + +public: + ObjectImage(ObjectBuffer *Input) : Buffer(Input) {} + virtual ~ObjectImage() {} + + virtual object::symbol_iterator begin_symbols() const = 0; + virtual object::symbol_iterator end_symbols() const = 0; + + virtual object::section_iterator begin_sections() const = 0; + virtual object::section_iterator end_sections() const = 0; + + virtual /* Triple::ArchType */ unsigned getArch() const = 0; + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const object::SectionRef &Sec, + uint64_t Addr) = 0; + virtual void updateSymbolAddress(const object::SymbolRef &Sym, + uint64_t Addr) = 0; + + virtual StringRef getData() const = 0; + + virtual object::ObjectFile* getObjectFile() const = 0; + + // Subclasses can override these methods to provide JIT debugging support + virtual void registerWithDebugger() = 0; + virtual void deregisterWithDebugger() = 0; +}; + +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_OBJECTIMAGE_H + diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index 891f534862f4..4222d5335bcc 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_RUNTIME_DYLD_H -#define LLVM_RUNTIME_DYLD_H +#ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H +#define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" @@ -36,29 +36,36 @@ public: RTDyldMemoryManager() {} virtual ~RTDyldMemoryManager(); - /// allocateCodeSection - Allocate a memory block of (at least) the given - /// size suitable for executable code. The SectionID is a unique identifier - /// assigned by the JIT engine, and optionally recorded by the memory manager - /// to access a loaded section. + /// Allocate a memory block of (at least) the given size suitable for + /// executable code. The SectionID is a unique identifier assigned by the JIT + /// engine, and optionally recorded by the memory manager to access a loaded + /// section. virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) = 0; - /// allocateDataSection - Allocate a memory block of (at least) the given - /// size suitable for data. The SectionID is a unique identifier - /// assigned by the JIT engine, and optionally recorded by the memory manager - /// to access a loaded section. + /// Allocate a memory block of (at least) the given size suitable for data. + /// The SectionID is a unique identifier assigned by the JIT engine, and + /// optionally recorded by the memory manager to access a loaded section. virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) = 0; + unsigned SectionID, bool IsReadOnly) = 0; - /// getPointerToNamedFunction - This method returns the address of the - /// specified function. As such it is only useful for resolving library - /// symbols, not code generated symbols. + /// This method returns the address of the specified function. As such it is + /// only useful for resolving library symbols, not code generated symbols. /// /// If AbortOnFailure is false and no function with the given name is /// found, this function returns a null pointer. Otherwise, it prints a /// message to stderr and aborts. virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true) = 0; + + /// This method is called when object loading is complete and section page + /// permissions can be applied. It is up to the memory manager implementation + /// to decide whether or not to act on this method. The memory manager will + /// typically allocate all sections as read-write and then apply specific + /// permissions when this method is called. + /// + /// Returns true if an error occurred, false otherwise. + virtual bool applyPermissions(std::string *ErrMsg = 0) = 0; }; class RuntimeDyld { @@ -77,10 +84,10 @@ public: RuntimeDyld(RTDyldMemoryManager *); ~RuntimeDyld(); - /// loadObject - prepare the object contained in the input buffer for - /// execution. Ownership of the input buffer is transferred to the - /// ObjectImage instance returned from this function if successful. - /// In the case of load failure, the input buffer will be deleted. + /// Prepare the object contained in the input buffer for execution. + /// Ownership of the input buffer is transferred to the ObjectImage + /// instance returned from this function if successful. In the case of load + /// failure, the input buffer will be deleted. ObjectImage *loadObject(ObjectBuffer *InputBuffer); /// Get the address of our local copy of the symbol. This may or may not @@ -95,7 +102,7 @@ public: /// Resolve the relocations for all symbols we currently know about. void resolveRelocations(); - /// mapSectionAddress - map a section to its target address space value. + /// Map a section to its target address space value. /// Map the address of a JIT section as returned from the memory manager /// to the address in the target process as the running code will see it. /// This is the address which will be used for relocation resolution. diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h new file mode 100644 index 000000000000..ae5004e130c0 --- /dev/null +++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h @@ -0,0 +1,176 @@ +//===- SectionMemoryManager.h - Memory manager for MCJIT/RtDyld -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of a section-based memory manager used by +// the MCJIT execution engine and RuntimeDyld. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" + +namespace llvm { + +/// This is a simple memory manager which implements the methods called by +/// the RuntimeDyld class to allocate memory for section-based loading of +/// objects, usually those generated by the MCJIT execution engine. +/// +/// This memory manager allocates all section memory as read-write. The +/// RuntimeDyld will copy JITed section memory into these allocated blocks +/// and perform any necessary linking and relocations. +/// +/// Any client using this memory manager MUST ensure that section-specific +/// page permissions have been applied before attempting to execute functions +/// in the JITed object. Permissions can be applied either by calling +/// MCJIT::finalizeObject or by calling SectionMemoryManager::applyPermissions +/// directly. Clients of MCJIT should call MCJIT::finalizeObject. +class SectionMemoryManager : public JITMemoryManager { + SectionMemoryManager(const SectionMemoryManager&) LLVM_DELETED_FUNCTION; + void operator=(const SectionMemoryManager&) LLVM_DELETED_FUNCTION; + +public: + SectionMemoryManager() { } + virtual ~SectionMemoryManager(); + + /// \brief Allocates a memory block of (at least) the given size suitable for + /// executable code. + /// + /// The value of \p Alignment must be a power of two. If \p Alignment is zero + /// a default alignment of 16 will be used. + virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID); + + /// \brief Allocates a memory block of (at least) the given size suitable for + /// executable code. + /// + /// The value of \p Alignment must be a power of two. If \p Alignment is zero + /// a default alignment of 16 will be used. + virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + bool isReadOnly); + + /// \brief Applies section-specific memory permissions. + /// + /// This method is called when object loading is complete and section page + /// permissions can be applied. It is up to the memory manager implementation + /// to decide whether or not to act on this method. The memory manager will + /// typically allocate all sections as read-write and then apply specific + /// permissions when this method is called. Code sections cannot be executed + /// until this function has been called. + /// + /// \returns true if an error occurred, false otherwise. + virtual bool applyPermissions(std::string *ErrMsg = 0); + + /// This method returns the address of the specified function. As such it is + /// only useful for resolving library symbols, not code generated symbols. + /// + /// If \p AbortOnFailure is false and no function with the given name is + /// found, this function returns a null pointer. Otherwise, it prints a + /// message to stderr and aborts. + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + + /// \brief Invalidate instruction cache for code sections. + /// + /// Some platforms with separate data cache and instruction cache require + /// explicit cache flush, otherwise JIT code manipulations (like resolved + /// relocations) will get to the data cache but not to the instruction cache. + /// + /// This method is not called by RuntimeDyld or MCJIT during the load + /// process. Clients may call this function when needed. See the lli + /// tool for example use. + virtual void invalidateInstructionCache(); + +private: + struct MemoryGroup { + SmallVector AllocatedMem; + SmallVector FreeMem; + sys::MemoryBlock Near; + }; + + uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size, + unsigned Alignment); + + error_code applyMemoryGroupPermissions(MemoryGroup &MemGroup, + unsigned Permissions); + + MemoryGroup CodeMem; + MemoryGroup RWDataMem; + MemoryGroup RODataMem; + +public: + /// + /// Functions below are not used by MCJIT or RuntimeDyld, but must be + /// implemented because they are declared as pure virtuals in the base class. + /// + + virtual void setMemoryWritable() { + llvm_unreachable("Unexpected call!"); + } + virtual void setMemoryExecutable() { + llvm_unreachable("Unexpected call!"); + } + virtual void setPoisonMemory(bool poison) { + llvm_unreachable("Unexpected call!"); + } + virtual void AllocateGOT() { + llvm_unreachable("Unexpected call!"); + } + virtual uint8_t *getGOTBase() const { + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual uint8_t *startFunctionBody(const Function *F, + uintptr_t &ActualSize){ + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual uint8_t *allocateStub(const GlobalValue *F, unsigned StubSize, + unsigned Alignment) { + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + llvm_unreachable("Unexpected call!"); + } + virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) { + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) { + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual void deallocateFunctionBody(void *Body) { + llvm_unreachable("Unexpected call!"); + } + virtual uint8_t *startExceptionTable(const Function *F, + uintptr_t &ActualSize) { + llvm_unreachable("Unexpected call!"); + return 0; + } + virtual void endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t *FrameRegister) { + llvm_unreachable("Unexpected call!"); + } + virtual void deallocateExceptionTable(void *ET) { + llvm_unreachable("Unexpected call!"); + } +}; + +} + +#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H + diff --git a/include/llvm/GVMaterializer.h b/include/llvm/GVMaterializer.h index c14355238867..1e5c4263d49b 100644 --- a/include/llvm/GVMaterializer.h +++ b/include/llvm/GVMaterializer.h @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#ifndef GVMATERIALIZER_H -#define GVMATERIALIZER_H +#ifndef LLVM_GVMATERIALIZER_H +#define LLVM_GVMATERIALIZER_H #include diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h new file mode 100644 index 000000000000..ef4e4fc7aa68 --- /dev/null +++ b/include/llvm/IR/Argument.h @@ -0,0 +1,96 @@ +//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Argument class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_ARGUMENT_H +#define LLVM_IR_ARGUMENT_H + +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Value.h" + +namespace llvm { + +template + class SymbolTableListTraits; + +/// \brief LLVM Argument representation +/// +/// This class represents an incoming formal argument to a Function. A formal +/// argument, since it is ``formal'', does not contain an actual value but +/// instead represents the type, argument number, and attributes of an argument +/// for a specific function. When used in the body of said function, the +/// argument of course represents the value of the actual argument that the +/// function was called with. +class Argument : public Value, public ilist_node { + virtual void anchor(); + Function *Parent; + + friend class SymbolTableListTraits; + void setParent(Function *parent); + +public: + /// \brief Constructor. + /// + /// If \p F is specified, the argument is inserted at the end of the argument + /// list for \p F. + explicit Argument(Type *Ty, const Twine &Name = "", Function *F = 0); + + inline const Function *getParent() const { return Parent; } + inline Function *getParent() { return Parent; } + + /// \brief Return the index of this formal argument in its containing + /// function. + /// + /// For example in "void foo(int a, float b)" a is 0 and b is 1. + unsigned getArgNo() const; + + /// \brief Return true if this argument has the byval attribute on it in its + /// containing function. + bool hasByValAttr() const; + + /// \brief If this is a byval argument, return its alignment. + unsigned getParamAlignment() const; + + /// \brief Return true if this argument has the nest attribute on it in its + /// containing function. + bool hasNestAttr() const; + + /// \brief Return true if this argument has the noalias attribute on it in its + /// containing function. + bool hasNoAliasAttr() const; + + /// \brief Return true if this argument has the nocapture attribute on it in + /// its containing function. + bool hasNoCaptureAttr() const; + + /// \brief Return true if this argument has the sret attribute on it in its + /// containing function. + bool hasStructRetAttr() const; + + /// \brief Add a Attribute to an argument. + void addAttr(AttributeSet AS); + + /// \brief Remove a Attribute from an argument. + void removeAttr(AttributeSet AS); + + /// \brief Method for support type inquiry through isa, cast, and + /// dyn_cast. + static inline bool classof(const Value *V) { + return V->getValueID() == ArgumentVal; + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h new file mode 100644 index 000000000000..074b38779ae8 --- /dev/null +++ b/include/llvm/IR/Attributes.h @@ -0,0 +1,499 @@ +//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the simple types necessary to represent the +/// attributes associated with functions and their calls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_ATTRIBUTES_H +#define LLVM_IR_ATTRIBUTES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +#include +#include +#include +#include + +namespace llvm { + +class AttrBuilder; +class AttributeImpl; +class AttributeSetImpl; +class AttributeSetNode; +class Constant; +template struct DenseMapInfo; +class LLVMContext; +class Type; + +//===----------------------------------------------------------------------===// +/// \class +/// \brief Functions, function parameters, and return types can have attributes +/// to indicate how they should be treated by optimizations and code +/// generation. This class represents one of those attributes. It's light-weight +/// and should be passed around by-value. +class Attribute { +public: + /// This enumeration lists the attributes that can be associated with + /// parameters, function results, or the function itself. + /// + /// Note: The `uwtable' attribute is about the ABI or the user mandating an + /// entry in the unwind table. The `nounwind' attribute is about an exception + /// passing by the function. + /// + /// In a theoretical system that uses tables for profiling and SjLj for + /// exceptions, they would be fully independent. In a normal system that uses + /// tables for both, the semantics are: + /// + /// nil = Needs an entry because an exception might pass by. + /// nounwind = No need for an entry + /// uwtable = Needs an entry because the ABI says so and because + /// an exception might pass by. + /// uwtable + nounwind = Needs an entry because the ABI says so. + + enum AttrKind { + // IR-Level Attributes + None, ///< No attributes have been set + Alignment, ///< Alignment of parameter (5 bits) + ///< stored as log2 of alignment with +1 bias + ///< 0 means unaligned (different from align(1)) + AlwaysInline, ///< inline=always + ByVal, ///< Pass structure by value + InlineHint, ///< Source said inlining was desirable + InReg, ///< Force argument to be passed in register + MinSize, ///< Function must be optimized for size first + Naked, ///< Naked function + Nest, ///< Nested function static chain + NoAlias, ///< Considered to not alias after call + NoBuiltin, ///< Callee isn't recognized as a builtin + NoCapture, ///< Function creates no aliases of pointer + NoDuplicate, ///< Call cannot be duplicated + NoImplicitFloat, ///< Disable implicit floating point insts + NoInline, ///< inline=never + NonLazyBind, ///< Function is called early and/or + ///< often, so lazy binding isn't worthwhile + NoRedZone, ///< Disable redzone + NoReturn, ///< Mark the function as not returning + NoUnwind, ///< Function doesn't unwind stack + OptimizeForSize, ///< opt_size + ReadNone, ///< Function does not access memory + ReadOnly, ///< Function only reads from memory + ReturnsTwice, ///< Function can return twice + SExt, ///< Sign extended before/after call + StackAlignment, ///< Alignment of stack for function (3 bits) + ///< stored as log2 of alignment with +1 bias 0 + ///< means unaligned (different from + ///< alignstack=(1)) + StackProtect, ///< Stack protection. + StackProtectReq, ///< Stack protection required. + StackProtectStrong, ///< Strong Stack protection. + StructRet, ///< Hidden pointer to structure to return + SanitizeAddress, ///< AddressSanitizer is on. + SanitizeThread, ///< ThreadSanitizer is on. + SanitizeMemory, ///< MemorySanitizer is on. + UWTable, ///< Function must be in a unwind table + ZExt, ///< Zero extended before/after call + + EndAttrKinds ///< Sentinal value useful for loops + }; +private: + AttributeImpl *pImpl; + Attribute(AttributeImpl *A) : pImpl(A) {} +public: + Attribute() : pImpl(0) {} + + //===--------------------------------------------------------------------===// + // Attribute Construction + //===--------------------------------------------------------------------===// + + /// \brief Return a uniquified Attribute object. + static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0); + static Attribute get(LLVMContext &Context, StringRef Kind, + StringRef Val = StringRef()); + + /// \brief Return a uniquified Attribute object that has the specific + /// alignment set. + static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align); + + //===--------------------------------------------------------------------===// + // Attribute Accessors + //===--------------------------------------------------------------------===// + + /// \brief Return true if the attribute is an Attribute::AttrKind type. + bool isEnumAttribute() const; + + /// \brief Return true if the attribute is an alignment attribute. + bool isAlignAttribute() const; + + /// \brief Return true if the attribute is a string (target-dependent) + /// attribute. + bool isStringAttribute() const; + + /// \brief Return true if the attribute is present. + bool hasAttribute(AttrKind Val) const; + + /// \brief Return true if the target-dependent attribute is present. + bool hasAttribute(StringRef Val) const; + + /// \brief Return the attribute's kind as an enum (Attribute::AttrKind). This + /// requires the attribute to be an enum or alignment attribute. + Attribute::AttrKind getKindAsEnum() const; + + /// \brief Return the attribute's value as an integer. This requires that the + /// attribute be an alignment attribute. + uint64_t getValueAsInt() const; + + /// \brief Return the attribute's kind as a string. This requires the + /// attribute to be a string attribute. + StringRef getKindAsString() const; + + /// \brief Return the attribute's value as a string. This requires the + /// attribute to be a string attribute. + StringRef getValueAsString() const; + + /// \brief Returns the alignment field of an attribute as a byte alignment + /// value. + unsigned getAlignment() const; + + /// \brief Returns the stack alignment field of an attribute as a byte + /// alignment value. + unsigned getStackAlignment() const; + + /// \brief The Attribute is converted to a string of equivalent mnemonic. This + /// is, presumably, for writing out the mnemonics for the assembly writer. + std::string getAsString(bool InAttrGrp = false) const; + + /// \brief Equality and non-equality operators. + bool operator==(Attribute A) const { return pImpl == A.pImpl; } + bool operator!=(Attribute A) const { return pImpl != A.pImpl; } + + /// \brief Less-than operator. Useful for sorting the attributes list. + bool operator<(Attribute A) const; + + void Profile(FoldingSetNodeID &ID) const { + ID.AddPointer(pImpl); + } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// \brief This class holds the attributes for a function, its return value, and +/// its parameters. You access the attributes for each of them via an index into +/// the AttributeSet object. The function attributes are at index +/// `AttributeSet::FunctionIndex', the return value is at index +/// `AttributeSet::ReturnIndex', and the attributes for the parameters start at +/// index `1'. +class AttributeSet { +public: + enum AttrIndex { + ReturnIndex = 0U, + FunctionIndex = ~0U + }; +private: + friend class AttrBuilder; + friend class AttributeSetImpl; + template friend struct DenseMapInfo; + + /// \brief The attributes that we are managing. This can be null to represent + /// the empty attributes list. + AttributeSetImpl *pImpl; + + /// \brief The attributes for the specified index are returned. + AttributeSetNode *getAttributes(unsigned Idx) const; + + /// \brief Create an AttributeSet with the specified parameters in it. + static AttributeSet get(LLVMContext &C, + ArrayRef > Attrs); + static AttributeSet get(LLVMContext &C, + ArrayRef > Attrs); + + static AttributeSet getImpl(LLVMContext &C, + ArrayRef > Attrs); + + + explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {} +public: + AttributeSet() : pImpl(0) {} + + //===--------------------------------------------------------------------===// + // AttributeSet Construction and Mutation + //===--------------------------------------------------------------------===// + + /// \brief Return an AttributeSet with the specified parameters in it. + static AttributeSet get(LLVMContext &C, ArrayRef Attrs); + static AttributeSet get(LLVMContext &C, unsigned Idx, + ArrayRef Kind); + static AttributeSet get(LLVMContext &C, unsigned Idx, AttrBuilder &B); + + /// \brief Add an attribute to the attribute set at the given index. Since + /// attribute sets are immutable, this returns a new set. + AttributeSet addAttribute(LLVMContext &C, unsigned Idx, + Attribute::AttrKind Attr) const; + + /// \brief Add an attribute to the attribute set at the given index. Since + /// attribute sets are immutable, this returns a new set. + AttributeSet addAttribute(LLVMContext &C, unsigned Idx, + StringRef Kind) const; + + /// \brief Add attributes to the attribute set at the given index. Since + /// attribute sets are immutable, this returns a new set. + AttributeSet addAttributes(LLVMContext &C, unsigned Idx, + AttributeSet Attrs) const; + + /// \brief Remove the specified attribute at the specified index from this + /// attribute list. Since attribute lists are immutable, this returns the new + /// list. + AttributeSet removeAttribute(LLVMContext &C, unsigned Idx, + Attribute::AttrKind Attr) const; + + /// \brief Remove the specified attributes at the specified index from this + /// attribute list. Since attribute lists are immutable, this returns the new + /// list. + AttributeSet removeAttributes(LLVMContext &C, unsigned Idx, + AttributeSet Attrs) const; + + //===--------------------------------------------------------------------===// + // AttributeSet Accessors + //===--------------------------------------------------------------------===// + + /// \brief Retrieve the LLVM context. + LLVMContext &getContext() const; + + /// \brief The attributes for the specified index are returned. + AttributeSet getParamAttributes(unsigned Idx) const; + + /// \brief The attributes for the ret value are returned. + AttributeSet getRetAttributes() const; + + /// \brief The function attributes are returned. + AttributeSet getFnAttributes() const; + + /// \brief Return true if the attribute exists at the given index. + bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const; + + /// \brief Return true if the attribute exists at the given index. + bool hasAttribute(unsigned Index, StringRef Kind) const; + + /// \brief Return true if attribute exists at the given index. + bool hasAttributes(unsigned Index) const; + + /// \brief Return true if the specified attribute is set for at least one + /// parameter or for the return value. + bool hasAttrSomewhere(Attribute::AttrKind Attr) const; + + /// \brief Return the attribute object that exists at the given index. + Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const; + + /// \brief Return the attribute object that exists at the given index. + Attribute getAttribute(unsigned Index, StringRef Kind) const; + + /// \brief Return the alignment for the specified function parameter. + unsigned getParamAlignment(unsigned Idx) const; + + /// \brief Get the stack alignment. + unsigned getStackAlignment(unsigned Index) const; + + /// \brief Return the attributes at the index as a string. + std::string getAsString(unsigned Index, bool InAttrGrp = false) const; + + typedef ArrayRef::iterator iterator; + + iterator begin(unsigned Idx) const; + iterator end(unsigned Idx) const; + + /// operator==/!= - Provide equality predicates. + bool operator==(const AttributeSet &RHS) const { + return pImpl == RHS.pImpl; + } + bool operator!=(const AttributeSet &RHS) const { + return pImpl != RHS.pImpl; + } + + //===--------------------------------------------------------------------===// + // AttributeSet Introspection + //===--------------------------------------------------------------------===// + + // FIXME: Remove this. + uint64_t Raw(unsigned Index) const; + + /// \brief Return a raw pointer that uniquely identifies this attribute list. + void *getRawPointer() const { + return pImpl; + } + + /// \brief Return true if there are no attributes. + bool isEmpty() const { + return getNumSlots() == 0; + } + + /// \brief Return the number of slots used in this attribute list. This is + /// the number of arguments that have an attribute set on them (including the + /// function itself). + unsigned getNumSlots() const; + + /// \brief Return the index for the given slot. + uint64_t getSlotIndex(unsigned Slot) const; + + /// \brief Return the attributes at the given slot. + AttributeSet getSlotAttributes(unsigned Slot) const; + + void dump() const; +}; + +//===----------------------------------------------------------------------===// +/// \class +/// \brief Provide DenseMapInfo for AttributeSet. +template<> struct DenseMapInfo { + static inline AttributeSet getEmptyKey() { + uintptr_t Val = static_cast(-1); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return AttributeSet(reinterpret_cast(Val)); + } + static inline AttributeSet getTombstoneKey() { + uintptr_t Val = static_cast(-2); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return AttributeSet(reinterpret_cast(Val)); + } + static unsigned getHashValue(AttributeSet AS) { + return (unsigned((uintptr_t)AS.pImpl) >> 4) ^ + (unsigned((uintptr_t)AS.pImpl) >> 9); + } + static bool isEqual(AttributeSet LHS, AttributeSet RHS) { return LHS == RHS; } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// \brief This class is used in conjunction with the Attribute::get method to +/// create an Attribute object. The object itself is uniquified. The Builder's +/// value, however, is not. So this can be used as a quick way to test for +/// equality, presence of attributes, etc. +class AttrBuilder { + std::bitset Attrs; + std::map TargetDepAttrs; + uint64_t Alignment; + uint64_t StackAlignment; +public: + AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0) {} + explicit AttrBuilder(uint64_t Val) + : Attrs(0), Alignment(0), StackAlignment(0) { + addRawValue(Val); + } + AttrBuilder(const Attribute &A) : Attrs(0), Alignment(0), StackAlignment(0) { + addAttribute(A); + } + AttrBuilder(AttributeSet AS, unsigned Idx); + AttrBuilder(const AttrBuilder &B) + : Attrs(B.Attrs), + TargetDepAttrs(B.TargetDepAttrs.begin(), B.TargetDepAttrs.end()), + Alignment(B.Alignment), StackAlignment(B.StackAlignment) {} + + void clear(); + + /// \brief Add an attribute to the builder. + AttrBuilder &addAttribute(Attribute::AttrKind Val); + + /// \brief Add the Attribute object to the builder. + AttrBuilder &addAttribute(Attribute A); + + /// \brief Add the target-dependent attribute to the builder. + AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef()); + + /// \brief Remove an attribute from the builder. + AttrBuilder &removeAttribute(Attribute::AttrKind Val); + + /// \brief Remove the attributes from the builder. + AttrBuilder &removeAttributes(AttributeSet A, uint64_t Index); + + /// \brief Remove the target-dependent attribute to the builder. + AttrBuilder &removeAttribute(StringRef A); + + /// \brief Add the attributes from the builder. + AttrBuilder &merge(const AttrBuilder &B); + + /// \brief Return true if the builder has the specified attribute. + bool contains(Attribute::AttrKind A) const { + assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!"); + return Attrs[A]; + } + + /// \brief Return true if the builder has the specified target-dependent + /// attribute. + bool contains(StringRef A) const; + + /// \brief Return true if the builder has IR-level attributes. + bool hasAttributes() const; + + /// \brief Return true if the builder has any attribute that's in the + /// specified attribute. + bool hasAttributes(AttributeSet A, uint64_t Index) const; + + /// \brief Return true if the builder has an alignment attribute. + bool hasAlignmentAttr() const; + + /// \brief Retrieve the alignment attribute, if it exists. + uint64_t getAlignment() const { return Alignment; } + + /// \brief Retrieve the stack alignment attribute, if it exists. + uint64_t getStackAlignment() const { return StackAlignment; } + + /// \brief This turns an int alignment (which must be a power of 2) into the + /// form used internally in Attribute. + AttrBuilder &addAlignmentAttr(unsigned Align); + + /// \brief This turns an int stack alignment (which must be a power of 2) into + /// the form used internally in Attribute. + AttrBuilder &addStackAlignmentAttr(unsigned Align); + + /// \brief Return true if the builder contains no target-independent + /// attributes. + bool empty() const { return Attrs.none(); } + + // Iterators for target-dependent attributes. + typedef std::pair td_type; + typedef std::map::iterator td_iterator; + typedef std::map::const_iterator td_const_iterator; + + td_iterator td_begin() { return TargetDepAttrs.begin(); } + td_iterator td_end() { return TargetDepAttrs.end(); } + + td_const_iterator td_begin() const { return TargetDepAttrs.begin(); } + td_const_iterator td_end() const { return TargetDepAttrs.end(); } + + bool td_empty() const { return TargetDepAttrs.empty(); } + + /// \brief Remove attributes that are used on functions only. + void removeFunctionOnlyAttrs(); + + bool operator==(const AttrBuilder &B); + bool operator!=(const AttrBuilder &B) { + return !(*this == B); + } + + // FIXME: Remove this in 4.0. + + /// \brief Add the raw value to the internal representation. + AttrBuilder &addRawValue(uint64_t Val); +}; + +namespace AttributeFuncs { + +/// \brief Which attributes cannot be applied to a type. +AttributeSet typeIncompatible(Type *Ty, uint64_t Index); + +} // end AttributeFuncs namespace + +} // end llvm namespace + +#endif diff --git a/include/llvm/BasicBlock.h b/include/llvm/IR/BasicBlock.h similarity index 53% rename from include/llvm/BasicBlock.h rename to include/llvm/IR/BasicBlock.h index 02c2a96b6c64..ea5695a9e640 100644 --- a/include/llvm/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_BASICBLOCK_H -#define LLVM_BASICBLOCK_H +#ifndef LLVM_IR_BASICBLOCK_H +#define LLVM_IR_BASICBLOCK_H -#include "llvm/Instruction.h" -#include "llvm/SymbolTableListTraits.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -29,19 +29,19 @@ class BlockAddress; template<> struct ilist_traits : public SymbolTableListTraits { - // createSentinel is used to get hold of a node that marks the end of - // the list... - // The sentinel is relative to this instance, so we use a non-static - // method. + + /// \brief Return a node that marks the end of a list. + /// + /// The sentinel is relative to this instance, so we use a non-static + /// method. Instruction *createSentinel() const { - // since i(p)lists always publicly derive from the corresponding - // traits, placing a data member in this class will augment i(p)list. - // But since the NodeTy is expected to publicly derive from - // ilist_node, there is a legal viable downcast from it - // to NodeTy. We use this trick to superpose i(p)list with a "ghostly" - // NodeTy, which becomes the sentinel. Dereferencing the sentinel is - // forbidden (save the ilist_node) so no one will ever notice - // the superposition. + // Since i(p)lists always publicly derive from their corresponding traits, + // placing a data member in this class will augment the i(p)list. But since + // the NodeTy is expected to be publicly derive from ilist_node, + // there is a legal viable downcast from it to NodeTy. We use this trick to + // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the + // sentinel. Dereferencing the sentinel is forbidden (save the + // ilist_node), so no one will ever notice the superposition. return static_cast(&Sentinel); } static void destroySentinel(Instruction*) {} @@ -53,6 +53,8 @@ private: mutable ilist_half_node Sentinel; }; +/// \brief LLVM Basic Block Representation +/// /// This represents a single basic block in LLVM. A basic block is simply a /// container of instructions that execute sequentially. Basic blocks are Values /// because they are referenced by instructions such as branches and switch @@ -66,7 +68,6 @@ private: /// occur because it may be useful in the intermediate stage of constructing or /// modifying a program. However, the verifier will ensure that basic blocks /// are "well formed". -/// @brief LLVM Basic Block Representation class BasicBlock : public Value, // Basic blocks are data objects also public ilist_node { friend class BlockAddress; @@ -82,102 +83,104 @@ private: BasicBlock(const BasicBlock &) LLVM_DELETED_FUNCTION; void operator=(const BasicBlock &) LLVM_DELETED_FUNCTION; - /// BasicBlock ctor - If the function parameter is specified, the basic block - /// is automatically inserted at either the end of the function (if - /// InsertBefore is null), or before the specified basic block. + /// \brief Constructor. /// + /// If the function parameter is specified, the basic block is automatically + /// inserted at either the end of the function (if InsertBefore is null), or + /// before the specified basic block. explicit BasicBlock(LLVMContext &C, const Twine &Name = "", Function *Parent = 0, BasicBlock *InsertBefore = 0); public: - /// getContext - Get the context in which this basic block lives. + /// \brief Get the context in which this basic block lives. LLVMContext &getContext() const; /// Instruction iterators... - typedef InstListType::iterator iterator; - typedef InstListType::const_iterator const_iterator; + typedef InstListType::iterator iterator; + typedef InstListType::const_iterator const_iterator; + typedef InstListType::reverse_iterator reverse_iterator; + typedef InstListType::const_reverse_iterator const_reverse_iterator; - /// Create - Creates a new BasicBlock. If the Parent parameter is specified, - /// the basic block is automatically inserted at either the end of the - /// function (if InsertBefore is 0), or before the specified basic block. + /// \brief Creates a new BasicBlock. + /// + /// If the Parent parameter is specified, the basic block is automatically + /// inserted at either the end of the function (if InsertBefore is 0), or + /// before the specified basic block. static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "", Function *Parent = 0,BasicBlock *InsertBefore = 0) { return new BasicBlock(Context, Name, Parent, InsertBefore); } ~BasicBlock(); - /// getParent - Return the enclosing method, or null if none - /// + /// \brief Return the enclosing method, or null if none. const Function *getParent() const { return Parent; } Function *getParent() { return Parent; } - /// getTerminator() - If this is a well formed basic block, then this returns - /// a pointer to the terminator instruction. If it is not, then you get a - /// null pointer back. - /// + /// \brief Returns the terminator instruction if the block is well formed or + /// null if the block is not well formed. TerminatorInst *getTerminator(); const TerminatorInst *getTerminator() const; - /// Returns a pointer to the first instructon in this block that is not a - /// PHINode instruction. When adding instruction to the beginning of the - /// basic block, they should be added before the returned value, not before - /// the first instruction, which might be PHI. - /// Returns 0 is there's no non-PHI instruction. + /// \brief Returns a pointer to the first instruction in this block that is + /// not a PHINode instruction. + /// + /// When adding instructions to the beginning of the basic block, they should + /// be added before the returned value, not before the first instruction, + /// which might be PHI. Returns 0 is there's no non-PHI instruction. Instruction* getFirstNonPHI(); const Instruction* getFirstNonPHI() const { return const_cast(this)->getFirstNonPHI(); } - // Same as above, but also skip debug intrinsics. + /// \brief Returns a pointer to the first instruction in this block that is not + /// a PHINode or a debug intrinsic. Instruction* getFirstNonPHIOrDbg(); const Instruction* getFirstNonPHIOrDbg() const { return const_cast(this)->getFirstNonPHIOrDbg(); } - // Same as above, but also skip lifetime intrinsics. + /// \brief Returns a pointer to the first instruction in this block that is not + /// a PHINode, a debug intrinsic, or a lifetime intrinsic. Instruction* getFirstNonPHIOrDbgOrLifetime(); const Instruction* getFirstNonPHIOrDbgOrLifetime() const { return const_cast(this)->getFirstNonPHIOrDbgOrLifetime(); } - /// getFirstInsertionPt - Returns an iterator to the first instruction in this - /// block that is suitable for inserting a non-PHI instruction. In particular, - /// it skips all PHIs and LandingPad instructions. + /// \brief Returns an iterator to the first instruction in this block that is + /// suitable for inserting a non-PHI instruction. + /// + /// In particular, it skips all PHIs and LandingPad instructions. iterator getFirstInsertionPt(); const_iterator getFirstInsertionPt() const { return const_cast(this)->getFirstInsertionPt(); } - /// removeFromParent - This method unlinks 'this' from the containing - /// function, but does not delete it. - /// + /// \brief Unlink 'this' from the containing function, but do not delete it. void removeFromParent(); - /// eraseFromParent - This method unlinks 'this' from the containing function - /// and deletes it. - /// + /// \brief Unlink 'this' from the containing function and delete it. void eraseFromParent(); - /// moveBefore - Unlink this basic block from its current function and - /// insert it into the function that MovePos lives in, right before MovePos. + /// \brief Unlink this basic block from its current function and insert it + /// into the function that \p MovePos lives in, right before \p MovePos. void moveBefore(BasicBlock *MovePos); - /// moveAfter - Unlink this basic block from its current function and - /// insert it into the function that MovePos lives in, right after MovePos. + /// \brief Unlink this basic block from its current function and insert it + /// right after \p MovePos in the function \p MovePos lives in. void moveAfter(BasicBlock *MovePos); - /// getSinglePredecessor - If this basic block has a single predecessor block, - /// return the block, otherwise return a null pointer. + /// \brief Return this block if it has a single predecessor block. Otherwise + /// return a null pointer. BasicBlock *getSinglePredecessor(); const BasicBlock *getSinglePredecessor() const { return const_cast(this)->getSinglePredecessor(); } - /// getUniquePredecessor - If this basic block has a unique predecessor block, - /// return the block, otherwise return a null pointer. + /// \brief Return this block if it has a unique predecessor block. Otherwise return a null pointer. + /// /// Note that unique predecessor doesn't mean single edge, there can be - /// multiple edges from the unique predecessor to this block (for example - /// a switch statement with multiple cases having the same destination). + /// multiple edges from the unique predecessor to this block (for example a + /// switch statement with multiple cases having the same destination). BasicBlock *getUniquePredecessor(); const BasicBlock *getUniquePredecessor() const { return const_cast(this)->getUniquePredecessor(); @@ -191,6 +194,11 @@ public: inline iterator end () { return InstList.end(); } inline const_iterator end () const { return InstList.end(); } + inline reverse_iterator rbegin() { return InstList.rbegin(); } + inline const_reverse_iterator rbegin() const { return InstList.rbegin(); } + inline reverse_iterator rend () { return InstList.rend(); } + inline const_reverse_iterator rend () const { return InstList.rend(); } + inline size_t size() const { return InstList.size(); } inline bool empty() const { return InstList.empty(); } inline const Instruction &front() const { return InstList.front(); } @@ -198,49 +206,52 @@ public: inline const Instruction &back() const { return InstList.back(); } inline Instruction &back() { return InstList.back(); } - /// getInstList() - Return the underlying instruction list container. You - /// need to access it directly if you want to modify it currently. + /// \brief Return the underlying instruction list container. /// + /// Currently you need to access the underlying instruction list container + /// directly if you want to modify it. const InstListType &getInstList() const { return InstList; } InstListType &getInstList() { return InstList; } - /// getSublistAccess() - returns pointer to member of instruction list + /// \brief Returns a pointer to a member of the instruction list. static iplist BasicBlock::*getSublistAccess(Instruction*) { return &BasicBlock::InstList; } - /// getValueSymbolTable() - returns pointer to symbol table (if any) + /// \brief Returns a pointer to the symbol table if one exists. ValueSymbolTable *getValueSymbolTable(); - /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Value *V) { return V->getValueID() == Value::BasicBlockVal; } - /// dropAllReferences() - This function causes all the subinstructions to "let - /// go" of all references that they are maintaining. This allows one to - /// 'delete' a whole class at a time, even though there may be circular - /// references... first all references are dropped, and all use counts go to - /// zero. Then everything is delete'd for real. Note that no operations are - /// valid on an object that has "dropped all references", except operator - /// delete. + /// \brief Cause all subinstructions to "let go" of all the references that + /// said subinstructions are maintaining. /// + /// This allows one to 'delete' a whole class at a time, even though there may + /// be circular references... first all references are dropped, and all use + /// counts go to zero. Then everything is delete'd for real. Note that no + /// operations are valid on an object that has "dropped all references", + /// except operator delete. void dropAllReferences(); - /// removePredecessor - This method is used to notify a BasicBlock that the - /// specified Predecessor of the block is no longer able to reach it. This is - /// actually not used to update the Predecessor list, but is actually used to - /// update the PHI nodes that reside in the block. Note that this should be - /// called while the predecessor still refers to this block. + /// \brief Notify the BasicBlock that the predecessor \p Pred is no longer + /// able to reach it. /// + /// This is actually not used to update the Predecessor list, but is actually + /// used to update the PHI nodes that reside in the block. Note that this + /// should be called while the predecessor still refers to this block. void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false); - /// splitBasicBlock - This splits a basic block into two at the specified - /// instruction. Note that all instructions BEFORE the specified iterator - /// stay as part of the original basic block, an unconditional branch is added - /// to the original BB, and the rest of the instructions in the BB are moved - /// to the new BB, including the old terminator. The newly formed BasicBlock - /// is returned. This function invalidates the specified iterator. + /// \brief Split the basic block into two basic blocks at the specified + /// instruction. + /// + /// Note that all instructions BEFORE the specified iterator stay as part of + /// the original basic block, an unconditional branch is added to the original + /// BB, and the rest of the instructions in the BB are moved to the new BB, + /// including the old terminator. The newly formed BasicBlock is returned. + /// This function invalidates the specified iterator. /// /// Note that this only works on well formed basic blocks (must have a /// terminator), and 'I' must not be the end of instruction list (which would @@ -249,37 +260,39 @@ public: /// /// Also note that this doesn't preserve any passes. To split blocks while /// keeping loop information consistent, use the SplitBlock utility function. - /// BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = ""); - /// hasAddressTaken - returns true if there are any uses of this basic block - /// other than direct branches, switches, etc. to it. + /// \brief Returns true if there are any uses of this basic block other than + /// direct branches, switches, etc. to it. bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; } - /// replaceSuccessorsPhiUsesWith - Update all phi nodes in all our successors - /// to refer to basic block New instead of to us. + /// \brief Update all phi nodes in this basic block's successors to refer to + /// basic block \p New instead of to it. void replaceSuccessorsPhiUsesWith(BasicBlock *New); - /// isLandingPad - Return true if this basic block is a landing pad. I.e., - /// it's the destination of the 'unwind' edge of an invoke instruction. + /// \brief Return true if this basic block is a landing pad. + /// + /// Being a ``landing pad'' means that the basic block is the destination of + /// the 'unwind' edge of an invoke instruction. bool isLandingPad() const; - /// getLandingPadInst() - Return the landingpad instruction associated with - /// the landing pad. + /// \brief Return the landingpad instruction associated with the landing pad. LandingPadInst *getLandingPadInst(); const LandingPadInst *getLandingPadInst() const; private: - /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress - /// objects using it. This is almost always 0, sometimes one, possibly but - /// almost never 2, and inconceivably 3 or more. + /// \brief Increment the internal refcount of the number of BlockAddresses + /// referencing this BasicBlock by \p Amt. + /// + /// This is almost always 0, sometimes one possibly, but almost never 2, and + /// inconceivably 3 or more. void AdjustBlockAddressRefCount(int Amt) { setValueSubclassData(getSubclassDataFromValue()+Amt); assert((int)(signed char)getSubclassDataFromValue() >= 0 && "Refcount wrap-around"); } - // Shadow Value::setValueSubclassData with a private forwarding method so that - // any future subclasses cannot accidentally use it. + /// \brief Shadow Value::setValueSubclassData with a private forwarding method + /// so that any future subclasses cannot accidentally use it. void setValueSubclassData(unsigned short D) { Value::setValueSubclassData(D); } diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt new file mode 100644 index 000000000000..2d52a89f9cd5 --- /dev/null +++ b/include/llvm/IR/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS Intrinsics.td) + +tablegen(LLVM Intrinsics.gen -gen-intrinsic) + +add_custom_target(intrinsics_gen ALL + DEPENDS ${llvm_builded_incs_dir}/IR/Intrinsics.gen) +set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning") diff --git a/include/llvm/CallingConv.h b/include/llvm/IR/CallingConv.h similarity index 96% rename from include/llvm/CallingConv.h rename to include/llvm/IR/CallingConv.h index 053f4eb326f9..6f3ab2088655 100644 --- a/include/llvm/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CALLINGCONV_H -#define LLVM_CALLINGCONV_H +#ifndef LLVM_IR_CALLINGCONV_H +#define LLVM_IR_CALLINGCONV_H namespace llvm { @@ -47,6 +47,10 @@ namespace CallingConv { // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC). GHC = 10, + // HiPE - Calling convention used by the High-Performance Erlang Compiler + // (HiPE). + HiPE = 11, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/include/llvm/Constant.h b/include/llvm/IR/Constant.h similarity index 92% rename from include/llvm/Constant.h rename to include/llvm/IR/Constant.h index 0ddd1db6c010..26bad1dd1f79 100644 --- a/include/llvm/Constant.h +++ b/include/llvm/IR/Constant.h @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CONSTANT_H -#define LLVM_CONSTANT_H +#ifndef LLVM_IR_CONSTANT_H +#define LLVM_IR_CONSTANT_H -#include "llvm/User.h" +#include "llvm/IR/User.h" namespace llvm { class APInt; @@ -61,6 +61,9 @@ public: /// by getZeroValueForNegation. bool isNegativeZeroValue() const; + /// Return true if the value is negative zero or null value. + bool isZeroValue() const; + /// canTrap - Return true if evaluation of this constant could trap. This is /// true for things like constant expressions that could divide by zero. bool canTrap() const; @@ -100,7 +103,15 @@ public: /// 'this' is a constant expr. Constant *getAggregateElement(unsigned Elt) const; Constant *getAggregateElement(Constant *Elt) const; - + + /// getSplatValue - If this is a splat vector constant, meaning that all of + /// the elements have the same value, return that value. Otherwise return 0. + Constant *getSplatValue() const; + + /// If C is a constant integer then return its value, otherwise C must be a + /// vector of constant integers, all equal, and the common value is returned. + const APInt &getUniqueInteger() const; + /// destroyConstant - Called if some element of this constant is no longer /// valid. At this point only other constants may be on the use_list for this /// constant. Any constants on our Use list must also be destroy'd. The diff --git a/include/llvm/Constants.h b/include/llvm/IR/Constants.h similarity index 97% rename from include/llvm/Constants.h rename to include/llvm/IR/Constants.h index 7f94ef464ea4..ad258f9aca4d 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/IR/Constants.h @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // /// @file -/// This file contains the declarations for the subclasses of Constant, +/// This file contains the declarations for the subclasses of Constant, /// which represent the different flavors of constant values that live in LLVM. -/// Note that Constants are immutable (once created they never change) and are +/// Note that Constants are immutable (once created they never change) and are /// fully shared by structural equivalence. This means that two structurally /// equivalent constants will always have the same address. Constant's are /// created on demand as needed and never deleted: thus clients don't have to @@ -18,14 +18,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CONSTANTS_H -#define LLVM_CONSTANTS_H +#ifndef LLVM_IR_CONSTANTS_H +#define LLVM_IR_CONSTANTS_H -#include "llvm/Constant.h" -#include "llvm/OperandTraits.h" -#include "llvm/ADT/APInt.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/OperandTraits.h" namespace llvm { @@ -44,7 +44,7 @@ template struct ConvertConstantType; //===----------------------------------------------------------------------===// -/// This is the shared class of boolean and integer constants. This class +/// This is the shared class of boolean and integer constants. This class /// represents both boolean and integral constants. /// @brief Class for constant integers. class ConstantInt : public Constant { @@ -63,11 +63,11 @@ public: static ConstantInt *getFalse(LLVMContext &Context); static Constant *getTrue(Type *Ty); static Constant *getFalse(Type *Ty); - + /// If Ty is a vector type, return a Constant with a splat of the given /// value. Otherwise return a ConstantInt for the given value. static Constant *get(Type *Ty, uint64_t V, bool isSigned = false); - + /// Return a ConstantInt with the specified integer value for the specified /// type. If the type is wider than 64 bits, the value will be zero-extended /// to fit the type, unless isSigned is true, in which case the value will @@ -84,27 +84,27 @@ public: /// @brief Get a ConstantInt for a specific signed value. static ConstantInt *getSigned(IntegerType *Ty, int64_t V); static Constant *getSigned(Type *Ty, int64_t V); - + /// Return a ConstantInt with the specified value and an implied Type. The /// type is the integer type that corresponds to the bit width of the value. static ConstantInt *get(LLVMContext &Context, const APInt &V); /// Return a ConstantInt constructed from the string strStart with the given - /// radix. + /// radix. static ConstantInt *get(IntegerType *Ty, StringRef Str, uint8_t radix); - + /// If Ty is a vector type, return a Constant with a splat of the given /// value. Otherwise return a ConstantInt for the given value. static Constant *get(Type* Ty, const APInt& V); - + /// Return the constant as an APInt value reference. This allows clients to /// obtain a copy of the value, with all its precision in tact. /// @brief Return the constant's value. inline const APInt &getValue() const { return Val; } - + /// getBitWidth - Return the bitwidth of this constant. unsigned getBitWidth() const { return Val.getBitWidth(); } @@ -126,8 +126,8 @@ public: return Val.getSExtValue(); } - /// A helper method that can be used to determine if the constant contained - /// within is equal to a constant. This only works for very small values, + /// A helper method that can be used to determine if the constant contained + /// within is equal to a constant. This only works for very small values, /// because this is all that can be represented with all types. /// @brief Determine if this constant's value is same as an unsigned char. bool equalsInt(uint64_t V) const { @@ -141,11 +141,11 @@ public: return reinterpret_cast(Value::getType()); } - /// This static method returns true if the type Ty is big enough to - /// represent the value V. This can be used to avoid having the get method + /// This static method returns true if the type Ty is big enough to + /// represent the value V. This can be used to avoid having the get method /// assert when V is larger than Ty can represent. Note that there are two /// versions of this method, one for unsigned and one for signed integers. - /// Although ConstantInt canonicalizes everything to an unsigned integer, + /// Although ConstantInt canonicalizes everything to an unsigned integer, /// the signed version avoids callers having to convert a signed quantity /// to the appropriate unsigned type before calling the method. /// @returns true if V is a valid value for type Ty @@ -162,7 +162,7 @@ public: return Val == 0; } - /// This is just a convenience method to make client code smaller for a + /// This is just a convenience method to make client code smaller for a /// common case. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). /// @brief Determine if the value is one. @@ -174,17 +174,17 @@ public: /// to true. /// @returns true iff this constant's bits are all set to true. /// @brief Determine if the value is all ones. - bool isMinusOne() const { + bool isMinusOne() const { return Val.isAllOnesValue(); } /// This function will return true iff this constant represents the largest /// value that may be represented by the constant's type. - /// @returns true iff this is the largest value that may be represented + /// @returns true iff this is the largest value that may be represented /// by this type. /// @brief Determine if the value is maximal. bool isMaxValue(bool isSigned) const { - if (isSigned) + if (isSigned) return Val.isMaxSignedValue(); else return Val.isMaxValue(); @@ -192,11 +192,11 @@ public: /// This function will return true iff this constant represents the smallest /// value that may be represented by this constant's type. - /// @returns true if this is the smallest value that may be represented by + /// @returns true if this is the smallest value that may be represented by /// this type. /// @brief Determine if the value is minimal. bool isMinValue(bool isSigned) const { - if (isSigned) + if (isSigned) return Val.isMinSignedValue(); else return Val.isMinValue(); @@ -248,7 +248,7 @@ public: /// method returns the negative zero constant for floating point or vector /// floating point types; for all other types, it returns the null value. static Constant *getZeroValueForNegation(Type *Ty); - + /// get() - This returns a ConstantFP, or a vector containing a splat of a /// ConstantFP, for the specified value in the specified type. This should /// only be used for simple constant values like 2.0/1.0 etc, that are @@ -258,7 +258,7 @@ public: static ConstantFP *get(LLVMContext &Context, const APFloat &V); static ConstantFP *getNegativeZero(Type* Ty); static ConstantFP *getInfinity(Type *Ty, bool Negative = false); - + /// isValueValidForType - return true if Ty is big enough to represent V. static bool isValueValidForType(Type *Ty, const APFloat &V); inline const APFloat &getValueAPF() const { return Val; } @@ -308,7 +308,7 @@ protected: } public: static ConstantAggregateZero *get(Type *Ty); - + virtual void destroyConstant(); /// getSequentialElement - If this CAZ has array or vector type, return a zero @@ -346,7 +346,7 @@ protected: public: // ConstantArray accessors static Constant *get(ArrayType *T, ArrayRef V); - + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -392,7 +392,7 @@ public: static Constant *getAnon(ArrayRef V, bool Packed = false) { return get(getTypeForElements(V, Packed), V); } - static Constant *getAnon(LLVMContext &Ctx, + static Constant *getAnon(LLVMContext &Ctx, ArrayRef V, bool Packed = false) { return get(getTypeForElements(Ctx, V, Packed), V); } @@ -405,7 +405,7 @@ public: static StructType *getTypeForElements(LLVMContext &Ctx, ArrayRef V, bool Packed = false); - + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -443,11 +443,11 @@ protected: public: // ConstantVector accessors static Constant *get(ArrayRef V); - + /// getSplat - Return a ConstantVector with the specified constant in each /// element. static Constant *getSplat(unsigned NumElts, Constant *Elt); - + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -512,7 +512,7 @@ public: return V->getValueID() == ConstantPointerNullVal; } }; - + //===----------------------------------------------------------------------===// /// ConstantDataSequential - A vector or array constant whose element type is a /// simple 1/2/4/8-byte integer or float/double, and whose elements are just @@ -527,7 +527,7 @@ class ConstantDataSequential : public Constant { /// DataElements - A pointer to the bytes underlying this constant (which is /// owned by the uniquing StringMap). const char *DataElements; - + /// Next - This forms a link list of ConstantDataSequential nodes that have /// the same value but different type. For example, 0,0,0,1 could be a 4 /// element array of i8, or a 1-element array of i32. They'll both end up in @@ -539,7 +539,7 @@ protected: explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) : Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {} ~ConstantDataSequential() { delete Next; } - + static Constant *getImpl(StringRef Bytes, Type *Ty); protected: @@ -548,13 +548,13 @@ protected: return User::operator new(s, 0); } public: - + /// isElementTypeCompatible - Return true if a ConstantDataSequential can be /// formed with a vector or array of the specified element type. /// ConstantDataArray only works with normal float and int types that are /// stored densely in memory, not with things like i42 or x86_f80. static bool isElementTypeCompatible(const Type *Ty); - + /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. uint64_t getElementAsInteger(unsigned i) const; @@ -566,26 +566,26 @@ public: /// getElementAsFloat - If this is an sequential container of floats, return /// the specified element as a float. float getElementAsFloat(unsigned i) const; - + /// getElementAsDouble - If this is an sequential container of doubles, return /// the specified element as a double. double getElementAsDouble(unsigned i) const; - + /// getElementAsConstant - Return a Constant for a specified index's element. /// Note that this has to compute a new constant to return, so it isn't as /// efficient as getElementAsInteger/Float/Double. Constant *getElementAsConstant(unsigned i) const; - + /// getType - Specialize the getType() method to always return a /// SequentialType, which reduces the amount of casting needed in parts of the /// compiler. inline SequentialType *getType() const { return reinterpret_cast(Value::getType()); } - + /// getElementType - Return the element type of the array/vector. Type *getElementType() const; - + /// getNumElements - Return the number of elements in the array or vector. unsigned getNumElements() const; @@ -594,14 +594,14 @@ public: /// byte. uint64_t getElementByteSize() const; - + /// isString - This method returns true if this is an array of i8. bool isString() const; - + /// isCString - This method returns true if the array "isString", ends with a /// nul byte, and does not contains any other nul bytes. bool isCString() const; - + /// getAsString - If this array is isString(), then this method returns the /// array as a StringRef. Otherwise, it asserts out. /// @@ -609,7 +609,7 @@ public: assert(isString() && "Not a string"); return getRawDataValues(); } - + /// getAsCString - If this array is isCString(), then this method returns the /// array (without the trailing null byte) as a StringRef. Otherwise, it /// asserts out. @@ -619,14 +619,14 @@ public: StringRef Str = getAsString(); return Str.substr(0, Str.size()-1); } - + /// getRawDataValues - Return the raw, underlying, bytes of this data. Note /// that this is an extremely tricky thing to work with, as it exposes the /// host endianness of the data elements. StringRef getRawDataValues() const; - + virtual void destroyConstant(); - + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const Value *V) { @@ -656,7 +656,7 @@ protected: return User::operator new(s, 0); } public: - + /// get() constructors - Return a constant with array type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -666,7 +666,7 @@ public: static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); - + /// getString - This method constructs a CDS and initializes it with a text /// string. The default behavior (AddNull==true) causes a null terminator to /// be placed at the end of the array (increasing the length of the string by @@ -681,14 +681,14 @@ public: inline ArrayType *getType() const { return reinterpret_cast(Value::getType()); } - + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const Value *V) { return V->getValueID() == ConstantDataArrayVal; } }; - + //===----------------------------------------------------------------------===// /// ConstantDataVector - A vector constant whose element type is a simple /// 1/2/4/8-byte integer or float/double, and whose elements are just simple @@ -708,7 +708,7 @@ protected: return User::operator new(s, 0); } public: - + /// get() constructors - Return a constant with vector type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -718,7 +718,7 @@ public: static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); - + /// getSplat - Return a ConstantVector with the specified constant in each /// element. The specified constant has to be a of a compatible type (i8/i16/ /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. @@ -727,14 +727,14 @@ public: /// getSplatValue - If this is a splat constant, meaning that all of the /// elements have the same value, return that value. Otherwise return NULL. Constant *getSplatValue() const; - + /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. /// inline VectorType *getType() const { return reinterpret_cast(Value::getType()); } - + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const Value *V) { @@ -753,20 +753,20 @@ class BlockAddress : public Constant { public: /// get - Return a BlockAddress for the specified function and basic block. static BlockAddress *get(Function *F, BasicBlock *BB); - + /// get - Return a BlockAddress for the specified basic block. The basic /// block must be embedded into a function. static BlockAddress *get(BasicBlock *BB); - + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - + Function *getFunction() const { return (Function*)Op<0>().get(); } BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); } - + virtual void destroyConstant(); virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); - + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *V) { return V->getValueID() == BlockAddressVal; @@ -779,7 +779,7 @@ struct OperandTraits : }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value) - + //===----------------------------------------------------------------------===// /// ConstantExpr - a constant value that is initialized with an expression using @@ -809,14 +809,14 @@ public: /// getAlignOf constant expr - computes the alignment of a type in a target /// independent way (Note: the return type is an i64). static Constant *getAlignOf(Type *Ty); - + /// getSizeOf constant expr - computes the (alloc) size of a type (in /// address-units, not bits) in a target independent way (Note: the return /// type is an i64). /// static Constant *getSizeOf(Type *Ty); - /// getOffsetOf constant expr - computes the offset of a struct field in a + /// getOffsetOf constant expr - computes the offset of a struct field in a /// target independent way (Note: the return type is an i64). /// static Constant *getOffsetOf(StructType *STy, unsigned FieldNo); @@ -825,7 +825,7 @@ public: /// which supports any aggregate type, and any Constant index. /// static Constant *getOffsetOf(Type *Ty, Constant *FieldNo); - + static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false); static Constant *getFNeg(Constant *C); static Constant *getNot(Constant *C); @@ -931,7 +931,7 @@ public: Type *Ty ///< The type to zext or bitcast C to ); - // @brief Create a SExt or BitCast cast constant expression + // @brief Create a SExt or BitCast cast constant expression static Constant *getSExtOrBitCast( Constant *C, ///< The constant to sext or bitcast Type *Ty ///< The type to sext or bitcast C to @@ -951,14 +951,14 @@ public: /// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts static Constant *getIntegerCast( - Constant *C, ///< The integer constant to be casted + Constant *C, ///< The integer constant to be casted Type *Ty, ///< The integer type to cast to bool isSigned ///< Whether C should be treated as signed or not ); /// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts static Constant *getFPCast( - Constant *C, ///< The integer constant to be casted + Constant *C, ///< The integer constant to be casted Type *Ty ///< The integer type to cast to ); @@ -1062,7 +1062,7 @@ public: /// getWithOperandReplaced - Return a constant expression identical to this /// one, but with the specified operand set to the specified value. Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const; - + /// getWithOperands - This returns the current constant expression with the /// operands replaced with the specified values. The specified array must /// have the same number of operands as our current one. @@ -1076,6 +1076,15 @@ public: /// current one. Constant *getWithOperands(ArrayRef Ops, Type *Ty) const; + /// getAsInstruction - Returns an Instruction which implements the same operation + /// as this ConstantExpr. The instruction is not linked to any basic block. + /// + /// A better approach to this could be to have a constructor for Instruction + /// which would take a ConstantExpr parameter, but that would have spread + /// implementation details of ConstantExpr outside of Constants.cpp, which + /// would make it harder to remove ConstantExprs altogether. + Instruction *getAsInstruction(); + virtual void destroyConstant(); virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); @@ -1083,7 +1092,7 @@ public: static inline bool classof(const Value *V) { return V->getValueID() == ConstantExprVal; } - + private: // Shadow Value::setValueSubclassData with a private forwarding method so that // subclasses cannot accidentally use it. @@ -1128,11 +1137,11 @@ public: /// getSequentialElement - If this Undef has array or vector type, return a /// undef with the right element type. UndefValue *getSequentialElement() const; - + /// getStructElement - If this undef has struct type, return a undef with the /// right element type for the specified element. UndefValue *getStructElement(unsigned Elt) const; - + /// getElementValue - Return an undef of the right value for the specified GEP /// index. UndefValue *getElementValue(Constant *C) const; diff --git a/include/llvm/DataLayout.h b/include/llvm/IR/DataLayout.h similarity index 85% rename from include/llvm/DataLayout.h rename to include/llvm/IR/DataLayout.h index 24ad05f17f39..547d857b7b73 100644 --- a/include/llvm/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -17,12 +17,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DATALAYOUT_H -#define LLVM_DATALAYOUT_H +#ifndef LLVM_IR_DATALAYOUT_H +#define LLVM_IR_DATALAYOUT_H -#include "llvm/Pass.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -39,6 +41,7 @@ class ArrayRef; /// Enum used to categorize the alignment types stored by LayoutAlignElem enum AlignTypeEnum { + INVALID_ALIGN = 0, ///< An invalid alignment INTEGER_ALIGN = 'i', ///< Integer type alignment VECTOR_ALIGN = 'v', ///< Vector type alignment FLOAT_ALIGN = 'f', ///< Floating point type alignment @@ -99,7 +102,7 @@ private: SmallVector LegalIntWidths; ///< Legal Integers. - /// Alignments- Where the primitive type alignment data is stored. + /// Alignments - Where the primitive type alignment data is stored. /// /// @sa init(). /// @note Could support multiple size pointer alignments, e.g., 32-bit @@ -148,9 +151,9 @@ private: return &align != &InvalidPointerElem; } - /// Initialise a DataLayout object with default values, ensure that the - /// target data pass is registered. - void init(); + /// Parses a target data specification string. Assert if the string is + /// malformed. + void parseSpecifier(StringRef LayoutDescription); public: /// Default ctor. @@ -162,23 +165,16 @@ public: /// Constructs a DataLayout from a specification string. See init(). explicit DataLayout(StringRef LayoutDescription) : ImmutablePass(ID) { - std::string errMsg = parseSpecifier(LayoutDescription, this); - assert(errMsg == "" && "Invalid target data layout string."); - (void)errMsg; + init(LayoutDescription); } - /// Parses a target data specification string. Returns an error message - /// if the string is malformed, or the empty string on success. Optionally - /// initialises a DataLayout object if passed a non-null pointer. - static std::string parseSpecifier(StringRef LayoutDescription, - DataLayout* td = 0); - /// Initialize target data from properties stored in the module. explicit DataLayout(const Module *M); DataLayout(const DataLayout &TD) : ImmutablePass(ID), LittleEndian(TD.isLittleEndian()), + StackNaturalAlign(TD.StackNaturalAlign), LegalIntWidths(TD.LegalIntWidths), Alignments(TD.Alignments), Pointers(TD.Pointers), @@ -187,6 +183,14 @@ public: ~DataLayout(); // Not virtual, do not subclass this class + /// DataLayout is an immutable pass, but holds state. This allows the pass + /// manager to clear its mutable state. + bool doFinalization(Module &M); + + /// Parse a data layout string (with fallback to default values). Ensure that + /// the data layout pass is registered. + void init(StringRef LayoutDescription); + /// Layout endianness... bool isLittleEndian() const { return LittleEndian; } bool isBigEndian() const { return !LittleEndian; } @@ -285,7 +289,8 @@ public: /// getTypeSizeInBits - Return the number of bits necessary to hold the /// specified type. For example, returns 36 for i36 and 80 for x86_fp80. - uint64_t getTypeSizeInBits(Type* Ty) const; + /// The type passed must have a size (Type::isSized() must return true). + uint64_t getTypeSizeInBits(Type *Ty) const; /// getTypeStoreSize - Return the maximum number of bytes that may be /// overwritten by storing the specified type. For example, returns 5 @@ -305,7 +310,7 @@ public: /// of the specified type, including alignment padding. This is the amount /// that alloca reserves for this type. For example, returns 12 or 16 for /// x86_fp80, depending on alignment. - uint64_t getTypeAllocSize(Type* Ty) const { + uint64_t getTypeAllocSize(Type *Ty) const { // Round up to the next alignment boundary. return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); } @@ -314,7 +319,7 @@ public: /// objects of the specified type, including alignment padding; always a /// multiple of 8. This is the amount that alloca reserves for this type. /// For example, returns 96 or 128 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSizeInBits(Type* Ty) const { + uint64_t getTypeAllocSizeInBits(Type *Ty) const { return 8*getTypeAllocSize(Ty); } @@ -326,19 +331,16 @@ public: /// an integer type of the specified bitwidth. unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const; - /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment /// for the specified type when it is part of a call frame. unsigned getCallFrameTypeAlignment(Type *Ty) const; - /// getPrefTypeAlignment - Return the preferred stack/global alignment for /// the specified type. This is always at least as good as the ABI alignment. unsigned getPrefTypeAlignment(Type *Ty) const; /// getPreferredTypeAlignmentShift - Return the preferred alignment for the /// specified type, returned as log2 of the value (a shift amount). - /// unsigned getPreferredTypeAlignmentShift(Type *Ty) const; /// getIntPtrType - Return an integer type with size at least as big as that @@ -350,9 +352,12 @@ public: /// type. Type *getIntPtrType(Type *) const; + /// getSmallestLegalIntType - Return the smallest integer type with size at + /// least as big as Width bits. + Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const; + /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. - /// uint64_t getIndexedOffset(Type *Ty, ArrayRef Indices) const; /// getStructLayout - Return a StructLayout object, indicating the alignment @@ -424,6 +429,49 @@ private: StructLayout(StructType *ST, const DataLayout &TD); }; + +// The implementation of this method is provided inline as it is particularly +// well suited to constant folding when called on a specific Type subclass. +inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { + assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + switch (Ty->getTypeID()) { + case Type::LabelTyID: + return getPointerSizeInBits(0); + case Type::PointerTyID: + return getPointerSizeInBits(cast(Ty)->getAddressSpace()); + case Type::ArrayTyID: { + ArrayType *ATy = cast(Ty); + return ATy->getNumElements() * + getTypeAllocSizeInBits(ATy->getElementType()); + } + case Type::StructTyID: + // Get the layout annotation... which is lazily created on demand. + return getStructLayout(cast(Ty))->getSizeInBits(); + case Type::IntegerTyID: + return cast(Ty)->getBitWidth(); + case Type::HalfTyID: + return 16; + case Type::FloatTyID: + return 32; + case Type::DoubleTyID: + case Type::X86_MMXTyID: + return 64; + case Type::PPC_FP128TyID: + case Type::FP128TyID: + return 128; + // In memory objects this is always aligned to a higher boundary, but + // only 80 bits contain information. + case Type::X86_FP80TyID: + return 80; + case Type::VectorTyID: { + VectorType *VTy = cast(Ty); + return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType()); + } + default: + llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); + } +} + } // End llvm namespace #endif diff --git a/include/llvm/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h similarity index 95% rename from include/llvm/DerivedTypes.h rename to include/llvm/IR/DerivedTypes.h index c862c2c8bb20..6c00f596badc 100644 --- a/include/llvm/DerivedTypes.h +++ b/include/llvm/IR/DerivedTypes.h @@ -15,12 +15,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DERIVED_TYPES_H -#define LLVM_DERIVED_TYPES_H +#ifndef LLVM_IR_DERIVEDTYPES_H +#define LLVM_IR_DERIVEDTYPES_H -#include "llvm/Type.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" namespace llvm { @@ -84,7 +84,7 @@ public: /// @brief Is this a power-of-2 byte-width IntegerType ? bool isPowerOf2ByteWidth() const; - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == IntegerTyID; } @@ -124,7 +124,7 @@ public: param_iterator param_begin() const { return ContainedTys + 1; } param_iterator param_end() const { return &ContainedTys[NumContainedTys]; } - // Parameter type accessors. + /// Parameter type accessors. Type *getParamType(unsigned i) const { return ContainedTys[i+1]; } /// getNumParams - Return the number of fixed parameters this function type @@ -132,7 +132,7 @@ public: /// unsigned getNumParams() const { return NumContainedTys - 1; } - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == FunctionTyID; } @@ -154,7 +154,7 @@ public: bool indexValid(const Value *V) const; bool indexValid(unsigned Idx) const; - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == ArrayTyID || T->getTypeID() == StructTyID || @@ -190,7 +190,7 @@ class StructType : public CompositeType { StructType(LLVMContext &C) : CompositeType(C, StructTyID), SymbolTableEntry(0) {} enum { - // This is the contents of the SubClassData field. + /// This is the contents of the SubClassData field. SCDB_HasBody = 1, SCDB_Packed = 2, SCDB_IsLiteral = 4, @@ -282,14 +282,14 @@ public: /// specified struct. bool isLayoutIdentical(StructType *Other) const; - // Random access to the elements + /// Random access to the elements unsigned getNumElements() const { return NumContainedTys; } Type *getElementType(unsigned N) const { assert(N < NumContainedTys && "Element number out of range!"); return ContainedTys[N]; } - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == StructTyID; } @@ -318,7 +318,7 @@ protected: public: Type *getElementType() const { return ContainedTys[0]; } - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == ArrayTyID || T->getTypeID() == PointerTyID || @@ -347,7 +347,7 @@ public: uint64_t getNumElements() const { return NumElements; } - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == ArrayTyID; } @@ -413,7 +413,7 @@ public: return NumElements * getElementType()->getPrimitiveSizeInBits(); } - // Methods for support type inquiry through isa, cast, and dyn_cast. + /// Methods for support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == VectorTyID; } @@ -444,7 +444,7 @@ public: /// @brief Return the address space of the Pointer type. inline unsigned getAddressSpace() const { return getSubclassData(); } - // Implement support type inquiry through isa, cast, and dyn_cast. + /// Implement support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const Type *T) { return T->getTypeID() == PointerTyID; } diff --git a/include/llvm/Function.h b/include/llvm/IR/Function.h similarity index 80% rename from include/llvm/Function.h rename to include/llvm/IR/Function.h index e211e9ab52a8..f97929f65854 100644 --- a/include/llvm/Function.h +++ b/include/llvm/IR/Function.h @@ -15,14 +15,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_FUNCTION_H -#define LLVM_FUNCTION_H +#ifndef LLVM_IR_FUNCTION_H +#define LLVM_IR_FUNCTION_H -#include "llvm/GlobalValue.h" -#include "llvm/CallingConv.h" -#include "llvm/BasicBlock.h" -#include "llvm/Argument.h" -#include "llvm/Attributes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Compiler.h" namespace llvm { @@ -85,11 +85,11 @@ private: BasicBlockListType BasicBlocks; ///< The basic blocks mutable ArgumentListType ArgumentList; ///< The formal arguments ValueSymbolTable *SymTab; ///< Symbol table of args/instructions - AttrListPtr AttributeList; ///< Parameter attributes + AttributeSet AttributeSets; ///< Parameter attributes // HasLazyArguments is stored in Value::SubclassData. /*bool HasLazyArguments;*/ - + // The Calling Convention is stored in Value::SubclassData. /*CallingConv::ID CallingConvention;*/ @@ -113,6 +113,10 @@ private: Function(const Function&) LLVM_DELETED_FUNCTION; void operator=(const Function&) LLVM_DELETED_FUNCTION; + /// Do the actual lookup of an intrinsic ID when the query could not be + /// answered from the cache. + unsigned lookupIntrinsicID() const LLVM_READONLY; + /// Function ctor - If the (optional) Module argument is specified, the /// function is automatically inserted into the end of the function list for /// the module. @@ -131,7 +135,7 @@ public: Type *getReturnType() const; // Return the type of the ret val FunctionType *getFunctionType() const; // Return the FunctionType for me - /// getContext - Return a pointer to the LLVMContext associated with this + /// getContext - Return a pointer to the LLVMContext associated with this /// function, or NULL if this function is not bound to a context yet. LLVMContext &getContext() const; @@ -141,13 +145,15 @@ public: /// getIntrinsicID - This method returns the ID number of the specified /// function, or Intrinsic::not_intrinsic if the function is not an - /// instrinsic, or if the pointer is null. This value is always defined to be + /// intrinsic, or if the pointer is null. This value is always defined to be /// zero to allow easy checking for whether a function is intrinsic or not. /// The particular intrinsic functions which correspond to this value are - /// defined in llvm/Intrinsics.h. + /// defined in llvm/Intrinsics.h. Results are cached in the LLVM context, + /// subsequent requests for the same ID return results much faster from the + /// cache. /// unsigned getIntrinsicID() const LLVM_READONLY; - bool isIntrinsic() const { return getIntrinsicID() != 0; } + bool isIntrinsic() const { return getName().startswith("llvm."); } /// getCallingConv()/setCallingConv(CC) - These method get and set the /// calling convention of this function. The enum values for the known @@ -159,33 +165,36 @@ public: setValueSubclassData((getSubclassDataFromValue() & 1) | (static_cast(CC) << 1)); } - + /// getAttributes - Return the attribute list for this Function. /// - const AttrListPtr &getAttributes() const { return AttributeList; } + AttributeSet getAttributes() const { return AttributeSets; } /// setAttributes - Set the attribute list for this Function. /// - void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; } + void setAttributes(AttributeSet attrs) { AttributeSets = attrs; } - /// getFnAttributes - Return the function attributes for querying. + /// addFnAttr - Add function attributes to this function. /// - Attributes getFnAttributes() const { - return AttributeList.getFnAttributes(); + void addFnAttr(Attribute::AttrKind N) { + setAttributes(AttributeSets.addAttribute(getContext(), + AttributeSet::FunctionIndex, N)); } /// addFnAttr - Add function attributes to this function. /// - void addFnAttr(Attributes::AttrVal N) { - // Function Attributes are stored at ~0 index - addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), N)); + void addFnAttr(StringRef Kind) { + setAttributes( + AttributeSets.addAttribute(getContext(), + AttributeSet::FunctionIndex, Kind)); } - /// removeFnAttr - Remove function attributes from this function. - /// - void removeFnAttr(Attributes N) { - // Function Attributes are stored at ~0 index - removeAttribute(~0U, N); + /// \brief Return true if the function has the attribute. + bool hasFnAttribute(Attribute::AttrKind Kind) const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind); + } + bool hasFnAttribute(StringRef Kind) const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind); } /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm @@ -195,68 +204,74 @@ public: void setGC(const char *Str); void clearGC(); + /// @brief adds the attribute to the list of attributes. + void addAttribute(unsigned i, Attribute::AttrKind attr); - /// getRetAttributes - Return the return attributes for querying. - Attributes getRetAttributes() const { - return AttributeList.getRetAttributes(); - } + /// @brief adds the attributes to the list of attributes. + void addAttributes(unsigned i, AttributeSet attrs); - /// getParamAttributes - Return the parameter attributes for querying. - Attributes getParamAttributes(unsigned Idx) const { - return AttributeList.getParamAttributes(Idx); - } - - /// addAttribute - adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attributes attr); - - /// removeAttribute - removes the attribute from the list of attributes. - void removeAttribute(unsigned i, Attributes attr); + /// @brief removes the attributes from the list of attributes. + void removeAttributes(unsigned i, AttributeSet attr); /// @brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { - return AttributeList.getParamAlignment(i); + return AttributeSets.getParamAlignment(i); } /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { - return getFnAttributes().hasAttribute(Attributes::ReadNone); + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReadNone); } void setDoesNotAccessMemory() { - addFnAttr(Attributes::ReadNone); + addFnAttr(Attribute::ReadNone); } /// @brief Determine if the function does not access or only reads memory. bool onlyReadsMemory() const { return doesNotAccessMemory() || - getFnAttributes().hasAttribute(Attributes::ReadOnly); + AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReadOnly); } void setOnlyReadsMemory() { - addFnAttr(Attributes::ReadOnly); + addFnAttr(Attribute::ReadOnly); } /// @brief Determine if the function cannot return. bool doesNotReturn() const { - return getFnAttributes().hasAttribute(Attributes::NoReturn); + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoReturn); } void setDoesNotReturn() { - addFnAttr(Attributes::NoReturn); + addFnAttr(Attribute::NoReturn); } /// @brief Determine if the function cannot unwind. bool doesNotThrow() const { - return getFnAttributes().hasAttribute(Attributes::NoUnwind); + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoUnwind); } void setDoesNotThrow() { - addFnAttr(Attributes::NoUnwind); + addFnAttr(Attribute::NoUnwind); + } + + /// @brief Determine if the call cannot be duplicated. + bool cannotDuplicate() const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoDuplicate); + } + void setCannotDuplicate() { + addFnAttr(Attribute::NoDuplicate); } /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { - return getFnAttributes().hasAttribute(Attributes::UWTable); + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::UWTable); } void setHasUWTable() { - addFnAttr(Attributes::UWTable); + addFnAttr(Attribute::UWTable); } /// @brief True if this function needs an unwind table. @@ -264,28 +279,28 @@ public: return hasUWTable() || !doesNotThrow(); } - /// @brief Determine if the function returns a structure through first + /// @brief Determine if the function returns a structure through first /// pointer argument. bool hasStructRetAttr() const { - return getParamAttributes(1).hasAttribute(Attributes::StructRet); + return AttributeSets.hasAttribute(1, Attribute::StructRet); } /// @brief Determine if the parameter does not alias other parameters. /// @param n The parameter to check. 1 is the first parameter, 0 is the return bool doesNotAlias(unsigned n) const { - return getParamAttributes(n).hasAttribute(Attributes::NoAlias); + return AttributeSets.hasAttribute(n, Attribute::NoAlias); } void setDoesNotAlias(unsigned n) { - addAttribute(n, Attributes::get(getContext(), Attributes::NoAlias)); + addAttribute(n, Attribute::NoAlias); } /// @brief Determine if the parameter can be captured. /// @param n The parameter to check. 1 is the first parameter, 0 is the return bool doesNotCapture(unsigned n) const { - return getParamAttributes(n).hasAttribute(Attributes::NoCapture); + return AttributeSets.hasAttribute(n, Attribute::NoCapture); } void setDoesNotCapture(unsigned n) { - addAttribute(n, Attributes::get(getContext(), Attributes::NoCapture)); + addAttribute(n, Attribute::NoCapture); } /// copyAttributesFrom - copy all additional attributes (those not needed to diff --git a/include/llvm/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h similarity index 95% rename from include/llvm/GlobalAlias.h rename to include/llvm/IR/GlobalAlias.h index d0f014733fce..883814a32371 100644 --- a/include/llvm/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_GLOBAL_ALIAS_H -#define LLVM_GLOBAL_ALIAS_H +#ifndef LLVM_IR_GLOBALALIAS_H +#define LLVM_IR_GLOBALALIAS_H -#include "llvm/GlobalValue.h" -#include "llvm/OperandTraits.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/OperandTraits.h" namespace llvm { diff --git a/include/llvm/GlobalValue.h b/include/llvm/IR/GlobalValue.h similarity index 99% rename from include/llvm/GlobalValue.h rename to include/llvm/IR/GlobalValue.h index 7f7f74b1e2da..f398bc1b87ab 100644 --- a/include/llvm/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -15,10 +15,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_GLOBALVALUE_H -#define LLVM_GLOBALVALUE_H +#ifndef LLVM_IR_GLOBALVALUE_H +#define LLVM_IR_GLOBALVALUE_H -#include "llvm/Constant.h" +#include "llvm/IR/Constant.h" namespace llvm { diff --git a/include/llvm/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h similarity index 80% rename from include/llvm/GlobalVariable.h rename to include/llvm/IR/GlobalVariable.h index b9d3f68642f4..bfed50786ea0 100644 --- a/include/llvm/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -17,13 +17,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_GLOBAL_VARIABLE_H -#define LLVM_GLOBAL_VARIABLE_H +#ifndef LLVM_IR_GLOBALVARIABLE_H +#define LLVM_IR_GLOBALVARIABLE_H -#include "llvm/GlobalValue.h" -#include "llvm/OperandTraits.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/OperandTraits.h" namespace llvm { @@ -40,9 +40,14 @@ class GlobalVariable : public GlobalValue, public ilist_node { void setParent(Module *parent); - bool isConstantGlobal : 1; // Is this a global constant? - unsigned threadLocalMode : 3; // Is this symbol "Thread Local", - // if so, what is the desired model? + bool isConstantGlobal : 1; // Is this a global constant? + unsigned threadLocalMode : 3; // Is this symbol "Thread Local", + // if so, what is the desired + // model? + bool isExternallyInitializedConstant : 1; // Is this a global whose value + // can change from its initial + // value before global + // initializers are run? public: // allocate space for exactly one operand @@ -62,15 +67,15 @@ public: /// automatically inserted into the end of the specified modules global list. GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage, Constant *Initializer = 0, const Twine &Name = "", - ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0); + ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0, + bool isExternallyInitialized = false); /// GlobalVariable ctor - This creates a global and inserts it before the /// specified other global. GlobalVariable(Module &M, Type *Ty, bool isConstant, LinkageTypes Linkage, Constant *Initializer, - const Twine &Name = "", - GlobalVariable *InsertBefore = 0, - ThreadLocalMode = NotThreadLocal, - unsigned AddressSpace = 0); + const Twine &Name = "", GlobalVariable *InsertBefore = 0, + ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0, + bool isExternallyInitialized = false); ~GlobalVariable() { NumOperands = 1; // FIXME: needed by operator delete @@ -105,7 +110,10 @@ public: return hasInitializer() && // The initializer of a global variable with weak linkage may change at // link time. - !mayBeOverridden(); + !mayBeOverridden() && + // The initializer of a global variable with the externally_initialized + // marker may change at runtime before C++ initializers are evaluated. + !isExternallyInitialized(); } /// hasUniqueInitializer - Whether the global variable has an initializer, and @@ -118,7 +126,11 @@ public: // instead. It is wrong to modify the initializer of a global variable // with *_odr linkage because then different instances of the global may // have different initializers, breaking the One Definition Rule. - !isWeakForLinker(); + !isWeakForLinker() && + // It is not safe to modify initializers of global variables with the + // external_initializer marker since the value may be changed at runtime + // before C++ initializers are evaluated. + !isExternallyInitialized(); } /// getInitializer - Return the initializer for this global variable. It is @@ -155,6 +167,13 @@ public: return static_cast(threadLocalMode); } + bool isExternallyInitialized() const { + return isExternallyInitializedConstant; + } + void setExternallyInitialized(bool Val) { + isExternallyInitializedConstant = Val; + } + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalVariable) from the GlobalVariable Src to this one. void copyAttributesFrom(const GlobalValue *Src); diff --git a/include/llvm/IRBuilder.h b/include/llvm/IR/IRBuilder.h similarity index 82% rename from include/llvm/IRBuilder.h rename to include/llvm/IR/IRBuilder.h index f63a16051e30..1c71d0a90146 100644 --- a/include/llvm/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -12,25 +12,27 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_IRBUILDER_H -#define LLVM_IRBUILDER_H +#ifndef LLVM_IR_IRBUILDER_H +#define LLVM_IR_IRBUILDER_H -#include "llvm/Instructions.h" -#include "llvm/BasicBlock.h" -#include "llvm/DataLayout.h" -#include "llvm/LLVMContext.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantFolder.h" namespace llvm { class MDNode; -/// IRBuilderDefaultInserter - This provides the default implementation of the -/// IRBuilder 'InsertHelper' method that is called whenever an instruction is -/// created by IRBuilder and needs to be inserted. By default, this inserts the -/// instruction at the insertion point. +/// \brief This provides the default implementation of the IRBuilder +/// 'InsertHelper' method that is called whenever an instruction is created by +/// IRBuilder and needs to be inserted. +/// +/// By default, this inserts the instruction at the insertion point. template class IRBuilderDefaultInserter { protected: @@ -42,7 +44,7 @@ protected: } }; -/// IRBuilderBase - Common base class shared among various IRBuilders. +/// \brief Common base class shared among various IRBuilders. class IRBuilderBase { DebugLoc CurDbgLocation; protected: @@ -60,8 +62,8 @@ public: // Builder configuration methods //===--------------------------------------------------------------------===// - /// ClearInsertionPoint - Clear the insertion point: created instructions will - /// not be inserted into a block. + /// \brief Clear the insertion point: created instructions will not be + /// inserted into a block. void ClearInsertionPoint() { BB = 0; } @@ -70,30 +72,30 @@ public: BasicBlock::iterator GetInsertPoint() const { return InsertPt; } LLVMContext &getContext() const { return Context; } - /// SetInsertPoint - This specifies that created instructions should be - /// appended to the end of the specified block. + /// \brief This specifies that created instructions should be appended to the + /// end of the specified block. void SetInsertPoint(BasicBlock *TheBB) { BB = TheBB; InsertPt = BB->end(); } - /// SetInsertPoint - This specifies that created instructions should be - /// inserted before the specified instruction. + /// \brief This specifies that created instructions should be inserted before + /// the specified instruction. void SetInsertPoint(Instruction *I) { BB = I->getParent(); InsertPt = I; SetCurrentDebugLocation(I->getDebugLoc()); } - /// SetInsertPoint - This specifies that created instructions should be - /// inserted at the specified point. + /// \brief This specifies that created instructions should be inserted at the + /// specified point. void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) { BB = TheBB; InsertPt = IP; } - /// SetInsertPoint(Use) - Find the nearest point that dominates this use, and - /// specify that created instructions should be inserted at this point. + /// \brief Find the nearest point that dominates this use, and specify that + /// created instructions should be inserted at this point. void SetInsertPoint(Use &U) { Instruction *UseInst = cast(U.getUser()); if (PHINode *Phi = dyn_cast(UseInst)) { @@ -105,25 +107,23 @@ public: SetInsertPoint(UseInst); } - /// SetCurrentDebugLocation - Set location information used by debugging - /// information. + /// \brief Set location information used by debugging information. void SetCurrentDebugLocation(const DebugLoc &L) { CurDbgLocation = L; } - /// getCurrentDebugLocation - Get location information used by debugging - /// information. + /// \brief Get location information used by debugging information. DebugLoc getCurrentDebugLocation() const { return CurDbgLocation; } - /// SetInstDebugLocation - If this builder has a current debug location, set - /// it on the specified instruction. + /// \brief If this builder has a current debug location, set it on the + /// specified instruction. void SetInstDebugLocation(Instruction *I) const { if (!CurDbgLocation.isUnknown()) I->setDebugLoc(CurDbgLocation); } - /// getCurrentFunctionReturnType - Get the return type of the current function - /// that we're emitting into. + /// \brief Get the return type of the current function that we're emitting + /// into. Type *getCurrentFunctionReturnType() const; /// InsertPoint - A saved insertion point. @@ -132,35 +132,33 @@ public: BasicBlock::iterator Point; public: - /// Creates a new insertion point which doesn't point to anything. + /// \brief Creates a new insertion point which doesn't point to anything. InsertPoint() : Block(0) {} - /// Creates a new insertion point at the given location. + /// \brief Creates a new insertion point at the given location. InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint) : Block(InsertBlock), Point(InsertPoint) {} - /// isSet - Returns true if this insert point is set. + /// \brief Returns true if this insert point is set. bool isSet() const { return (Block != 0); } llvm::BasicBlock *getBlock() const { return Block; } llvm::BasicBlock::iterator getPoint() const { return Point; } }; - /// saveIP - Returns the current insert point. + /// \brief Returns the current insert point. InsertPoint saveIP() const { return InsertPoint(GetInsertBlock(), GetInsertPoint()); } - /// saveAndClearIP - Returns the current insert point, clearing it - /// in the process. + /// \brief Returns the current insert point, clearing it in the process. InsertPoint saveAndClearIP() { InsertPoint IP(GetInsertBlock(), GetInsertPoint()); ClearInsertionPoint(); return IP; } - /// restoreIP - Sets the current insert point to a previously-saved - /// location. + /// \brief Sets the current insert point to a previously-saved location. void restoreIP(InsertPoint IP) { if (IP.isSet()) SetInsertPoint(IP.getBlock(), IP.getPoint()); @@ -172,49 +170,50 @@ public: // Miscellaneous creation methods. //===--------------------------------------------------------------------===// - /// CreateGlobalString - Make a new global variable with an initializer that - /// has array of i8 type filled in with the nul terminated string value - /// specified. The new global variable will be marked mergable with any - /// others of the same contents. If Name is specified, it is the name of the - /// global variable created. + /// \brief Make a new global variable with initializer type i8* + /// + /// Make a new global variable with an initializer that has array of i8 type + /// filled in with the null terminated string value specified. The new global + /// variable will be marked mergable with any others of the same contents. If + /// Name is specified, it is the name of the global variable created. Value *CreateGlobalString(StringRef Str, const Twine &Name = ""); - /// getInt1 - Get a constant value representing either true or false. + /// \brief Get a constant value representing either true or false. ConstantInt *getInt1(bool V) { return ConstantInt::get(getInt1Ty(), V); } - /// getTrue - Get the constant value for i1 true. + /// \brief Get the constant value for i1 true. ConstantInt *getTrue() { return ConstantInt::getTrue(Context); } - /// getFalse - Get the constant value for i1 false. + /// \brief Get the constant value for i1 false. ConstantInt *getFalse() { return ConstantInt::getFalse(Context); } - /// getInt8 - Get a constant 8-bit value. + /// \brief Get a constant 8-bit value. ConstantInt *getInt8(uint8_t C) { return ConstantInt::get(getInt8Ty(), C); } - /// getInt16 - Get a constant 16-bit value. + /// \brief Get a constant 16-bit value. ConstantInt *getInt16(uint16_t C) { return ConstantInt::get(getInt16Ty(), C); } - /// getInt32 - Get a constant 32-bit value. + /// \brief Get a constant 32-bit value. ConstantInt *getInt32(uint32_t C) { return ConstantInt::get(getInt32Ty(), C); } - /// getInt64 - Get a constant 64-bit value. + /// \brief Get a constant 64-bit value. ConstantInt *getInt64(uint64_t C) { return ConstantInt::get(getInt64Ty(), C); } - /// getInt - Get a constant integer value. + /// \brief Get a constant integer value. ConstantInt *getInt(const APInt &AI) { return ConstantInt::get(Context, AI); } @@ -223,50 +222,52 @@ public: // Type creation methods //===--------------------------------------------------------------------===// - /// getInt1Ty - Fetch the type representing a single bit + /// \brief Fetch the type representing a single bit IntegerType *getInt1Ty() { return Type::getInt1Ty(Context); } - /// getInt8Ty - Fetch the type representing an 8-bit integer. + /// \brief Fetch the type representing an 8-bit integer. IntegerType *getInt8Ty() { return Type::getInt8Ty(Context); } - /// getInt16Ty - Fetch the type representing a 16-bit integer. + /// \brief Fetch the type representing a 16-bit integer. IntegerType *getInt16Ty() { return Type::getInt16Ty(Context); } - /// getInt32Ty - Fetch the type resepresenting a 32-bit integer. + /// \brief Fetch the type representing a 32-bit integer. IntegerType *getInt32Ty() { return Type::getInt32Ty(Context); } - /// getInt64Ty - Fetch the type representing a 64-bit integer. + /// \brief Fetch the type representing a 64-bit integer. IntegerType *getInt64Ty() { return Type::getInt64Ty(Context); } - /// getFloatTy - Fetch the type representing a 32-bit floating point value. + /// \brief Fetch the type representing a 32-bit floating point value. Type *getFloatTy() { return Type::getFloatTy(Context); } - /// getDoubleTy - Fetch the type representing a 64-bit floating point value. + /// \brief Fetch the type representing a 64-bit floating point value. Type *getDoubleTy() { return Type::getDoubleTy(Context); } - /// getVoidTy - Fetch the type representing void. + /// \brief Fetch the type representing void. Type *getVoidTy() { return Type::getVoidTy(Context); } + /// \brief Fetch the type representing a pointer to an 8-bit integer value. PointerType *getInt8PtrTy(unsigned AddrSpace = 0) { return Type::getInt8PtrTy(Context, AddrSpace); } + /// \brief Fetch the type representing a pointer to an integer value. IntegerType* getIntPtrTy(DataLayout *DL, unsigned AddrSpace = 0) { return DL->getIntPtrType(Context, AddrSpace); } @@ -275,9 +276,11 @@ public: // Intrinsic creation methods //===--------------------------------------------------------------------===// - /// CreateMemSet - Create and insert a memset to the specified pointer and the - /// specified value. If the pointer isn't an i8*, it will be converted. If a - /// TBAA tag is specified, it will be added to the instruction. + /// \brief Create and insert a memset to the specified pointer and the + /// specified value. + /// + /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile = false, MDNode *TBAATag = 0) { return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag); @@ -286,7 +289,8 @@ public: CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, bool isVolatile = false, MDNode *TBAATag = 0); - /// CreateMemCpy - Create and insert a memcpy between the specified pointers. + /// \brief Create and insert a memcpy between the specified pointers. + /// /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, @@ -300,9 +304,11 @@ public: bool isVolatile = false, MDNode *TBAATag = 0, MDNode *TBAAStructTag = 0); - /// CreateMemMove - Create and insert a memmove between the specified - /// pointers. If the pointers aren't i8*, they will be converted. If a TBAA - /// tag is specified, it will be added to the instruction. + /// \brief Create and insert a memmove between the specified + /// pointers. + /// + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, bool isVolatile = false, MDNode *TBAATag = 0) { return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag); @@ -311,25 +317,30 @@ public: CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile = false, MDNode *TBAATag = 0); - /// CreateLifetimeStart - Create a lifetime.start intrinsic. If the pointer - /// isn't i8* it will be converted. + /// \brief Create a lifetime.start intrinsic. + /// + /// If the pointer isn't i8* it will be converted. CallInst *CreateLifetimeStart(Value *Ptr, ConstantInt *Size = 0); - /// CreateLifetimeEnd - Create a lifetime.end intrinsic. If the pointer isn't - /// i8* it will be converted. + /// \brief Create a lifetime.end intrinsic. + /// + /// If the pointer isn't i8* it will be converted. CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = 0); private: Value *getCastedInt8PtrValue(Value *Ptr); }; -/// IRBuilder - This provides a uniform API for creating instructions and -/// inserting them into a basic block: either at the end of a BasicBlock, or -/// at a specific iterator location in a block. +/// \brief This provides a uniform API for creating instructions and inserting +/// them into a basic block: either at the end of a BasicBlock, or at a specific +/// iterator location in a block. /// /// Note that the builder does not expose the full generality of LLVM /// instructions. For access to extra instruction properties, use the mutators -/// (e.g. setVolatile) on the instructions after they have been created. +/// (e.g. setVolatile) on the instructions after they have been +/// created. Convenience state exists to specify fast-math flags and fp-math +/// tags. +/// /// The first template argument handles whether or not to preserve names in the /// final instruction output. This defaults to on. The second template argument /// specifies a class to use for creating constants. This defaults to creating @@ -341,36 +352,40 @@ templategetContext()), Folder(F), - DefaultFPMathTag(FPMathTag) { + DefaultFPMathTag(FPMathTag), FMF() { SetInsertPoint(TheBB); } explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = 0) : IRBuilderBase(TheBB->getContext()), Folder(), - DefaultFPMathTag(FPMathTag) { + DefaultFPMathTag(FPMathTag), FMF() { SetInsertPoint(TheBB); } explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = 0) - : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag) { + : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag), + FMF() { SetInsertPoint(IP); SetCurrentDebugLocation(IP->getDebugLoc()); } explicit IRBuilder(Use &U, MDNode *FPMathTag = 0) - : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag) { + : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag), + FMF() { SetInsertPoint(U); SetCurrentDebugLocation(cast(U.getUser())->getDebugLoc()); } @@ -378,39 +393,47 @@ public: IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F, MDNode *FPMathTag = 0) : IRBuilderBase(TheBB->getContext()), Folder(F), - DefaultFPMathTag(FPMathTag) { + DefaultFPMathTag(FPMathTag), FMF() { SetInsertPoint(TheBB, IP); } IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, MDNode *FPMathTag = 0) : IRBuilderBase(TheBB->getContext()), Folder(), - DefaultFPMathTag(FPMathTag) { + DefaultFPMathTag(FPMathTag), FMF() { SetInsertPoint(TheBB, IP); } - /// getFolder - Get the constant folder being used. + /// \brief Get the constant folder being used. const T &getFolder() { return Folder; } - /// getDefaultFPMathTag - Get the floating point math metadata being used. + /// \brief Get the floating point math metadata being used. MDNode *getDefaultFPMathTag() const { return DefaultFPMathTag; } - /// SetDefaultFPMathTag - Set the floating point math metadata to be used. + /// \brief Get the flags to be applied to created floating point ops + FastMathFlags getFastMathFlags() const { return FMF; } + + /// \brief Clear the fast-math flags. + void clearFastMathFlags() { FMF.clear(); } + + /// \brief SetDefaultFPMathTag - Set the floating point math metadata to be used. void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; } - /// isNamePreserving - Return true if this builder is configured to actually - /// add the requested names to IR created through it. + /// \brief Set the fast-math flags to be used with generated fp-math operators + void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; } + + /// \brief Return true if this builder is configured to actually add the + /// requested names to IR created through it. bool isNamePreserving() const { return preserveNames; } - /// Insert - Insert and return the specified instruction. + /// \brief Insert and return the specified instruction. template InstTy *Insert(InstTy *I, const Twine &Name = "") const { this->InsertHelper(I, Name, BB, InsertPt); - if (!getCurrentDebugLocation().isUnknown()) - this->SetInstDebugLocation(I); + this->SetInstDebugLocation(I); return I; } - /// Insert - No-op overload to handle constants. + /// \brief No-op overload to handle constants. Constant *Insert(Constant *C, const Twine& = "") const { return C; } @@ -430,25 +453,23 @@ private: } public: - /// CreateRetVoid - Create a 'ret void' instruction. + /// \brief Create a 'ret void' instruction. ReturnInst *CreateRetVoid() { return Insert(ReturnInst::Create(Context)); } - /// @verbatim - /// CreateRet - Create a 'ret ' instruction. - /// @endverbatim + /// \brief Create a 'ret ' instruction. ReturnInst *CreateRet(Value *V) { return Insert(ReturnInst::Create(Context, V)); } - /// CreateAggregateRet - Create a sequence of N insertvalue instructions, + /// \brief Create a sequence of N insertvalue instructions, /// with one Value from the retVals array each, that build a aggregate /// return value one value at a time, and a ret instruction to return - /// the resulting aggregate value. This is a convenience function for - /// code that uses aggregate return values as a vehicle for having - /// multiple return values. + /// the resulting aggregate value. /// + /// This is a convenience function for code that uses aggregate return values + /// as a vehicle for having multiple return values. ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) { Value *V = UndefValue::get(getCurrentFunctionReturnType()); for (unsigned i = 0; i != N; ++i) @@ -456,12 +477,12 @@ public: return Insert(ReturnInst::Create(Context, V)); } - /// CreateBr - Create an unconditional 'br label X' instruction. + /// \brief Create an unconditional 'br label X' instruction. BranchInst *CreateBr(BasicBlock *Dest) { return Insert(BranchInst::Create(Dest)); } - /// CreateCondBr - Create a conditional 'br Cond, TrueDest, FalseDest' + /// \brief Create a conditional 'br Cond, TrueDest, FalseDest' /// instruction. BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights = 0) { @@ -469,18 +490,18 @@ public: BranchWeights)); } - /// CreateSwitch - Create a switch instruction with the specified value, - /// default dest, and with a hint for the number of cases that will be added - /// (for efficient allocation). + /// \brief Create a switch instruction with the specified value, default dest, + /// and with a hint for the number of cases that will be added (for efficient + /// allocation). SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10, MDNode *BranchWeights = 0) { return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases), BranchWeights)); } - /// CreateIndirectBr - Create an indirect branch instruction with the - /// specified address operand, with an optional hint for the number of - /// destinations that will be added (for efficient allocation). + /// \brief Create an indirect branch instruction with the specified address + /// operand, with an optional hint for the number of destinations that will be + /// added (for efficient allocation). IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) { return Insert(IndirectBrInst::Create(Addr, NumDests)); } @@ -505,7 +526,7 @@ public: return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args), Name); } - /// CreateInvoke - Create an invoke instruction. + /// \brief Create an invoke instruction. InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef Args, const Twine &Name = "") { @@ -535,11 +556,14 @@ private: return BO; } - Instruction *AddFPMathTag(Instruction *I, MDNode *FPMathTag) const { + Instruction *AddFPMathAttributes(Instruction *I, + MDNode *FPMathTag, + FastMathFlags FMF) const { if (!FPMathTag) FPMathTag = DefaultFPMathTag; if (FPMathTag) I->setMetadata(LLVMContext::MD_fpmath, FPMathTag); + I->setFastMathFlags(FMF); return I; } public: @@ -562,8 +586,8 @@ public: if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Insert(Folder.CreateFAdd(LC, RC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFAdd(LHS, RHS), - FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFAdd(LHS, RHS), + FPMathTag, FMF), Name); } Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { @@ -584,8 +608,8 @@ public: if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Insert(Folder.CreateFSub(LC, RC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFSub(LHS, RHS), - FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFSub(LHS, RHS), + FPMathTag, FMF), Name); } Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { @@ -606,8 +630,8 @@ public: if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Insert(Folder.CreateFMul(LC, RC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFMul(LHS, RHS), - FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFMul(LHS, RHS), + FPMathTag, FMF), Name); } Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "", bool isExact = false) { @@ -638,8 +662,8 @@ public: if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Insert(Folder.CreateFDiv(LC, RC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFDiv(LHS, RHS), - FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFDiv(LHS, RHS), + FPMathTag, FMF), Name); } Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) @@ -658,8 +682,8 @@ public: if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Insert(Folder.CreateFRem(LC, RC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFRem(LHS, RHS), - FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFRem(LHS, RHS), + FPMathTag, FMF), Name); } Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "", @@ -788,7 +812,8 @@ public: Value *CreateFNeg(Value *V, const Twine &Name = "", MDNode *FPMathTag = 0) { if (Constant *VC = dyn_cast(V)) return Insert(Folder.CreateFNeg(VC), Name); - return Insert(AddFPMathTag(BinaryOperator::CreateFNeg(V), FPMathTag), Name); + return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V), + FPMathTag, FMF), Name); } Value *CreateNot(Value *V, const Twine &Name = "") { if (Constant *VC = dyn_cast(V)) @@ -804,7 +829,7 @@ public: const Twine &Name = "") { return Insert(new AllocaInst(Ty, ArraySize), Name); } - // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of + // \brief Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of // converting the string to 'bool' for the isVolatile parameter. LoadInst *CreateLoad(Value *Ptr, const char *Name) { return Insert(new LoadInst(Ptr), Name); @@ -818,8 +843,9 @@ public: StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) { return Insert(new StoreInst(Val, Ptr, isVolatile)); } - // Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly, - // instead of converting the string to 'bool' for the isVolatile parameter. + // \brief Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' + // correctly, instead of converting the string to 'bool' for the isVolatile + // parameter. LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) { LoadInst *LI = CreateLoad(Ptr, Name); LI->setAlignment(Align); @@ -981,8 +1007,8 @@ public: return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name); } - /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer - /// with "i8*" type instead of a pointer to array of i8. + /// \brief Same as CreateGlobalString, but return a pointer with "i8*" type + /// instead of a pointer to array of i8. Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") { Value *gv = CreateGlobalString(Str, Name); Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0); @@ -1003,27 +1029,31 @@ public: Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::SExt, V, DestTy, Name); } - /// CreateZExtOrTrunc - Create a ZExt or Trunc from the integer value V to - /// DestTy. Return the value untouched if the type of V is already DestTy. - Value *CreateZExtOrTrunc(Value *V, IntegerType *DestTy, + /// \brief Create a ZExt or Trunc from the integer value V to DestTy. Return + /// the value untouched if the type of V is already DestTy. + Value *CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name = "") { - assert(isa(V->getType()) && "Can only zero extend integers!"); - IntegerType *IntTy = cast(V->getType()); - if (IntTy->getBitWidth() < DestTy->getBitWidth()) + assert(V->getType()->isIntOrIntVectorTy() && + DestTy->isIntOrIntVectorTy() && + "Can only zero extend/truncate integers!"); + Type *VTy = V->getType(); + if (VTy->getScalarSizeInBits() < DestTy->getScalarSizeInBits()) return CreateZExt(V, DestTy, Name); - if (IntTy->getBitWidth() > DestTy->getBitWidth()) + if (VTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits()) return CreateTrunc(V, DestTy, Name); return V; } - /// CreateSExtOrTrunc - Create a SExt or Trunc from the integer value V to - /// DestTy. Return the value untouched if the type of V is already DestTy. - Value *CreateSExtOrTrunc(Value *V, IntegerType *DestTy, + /// \brief Create a SExt or Trunc from the integer value V to DestTy. Return + /// the value untouched if the type of V is already DestTy. + Value *CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name = "") { - assert(isa(V->getType()) && "Can only sign extend integers!"); - IntegerType *IntTy = cast(V->getType()); - if (IntTy->getBitWidth() < DestTy->getBitWidth()) + assert(V->getType()->isIntOrIntVectorTy() && + DestTy->isIntOrIntVectorTy() && + "Can only sign extend/truncate integers!"); + Type *VTy = V->getType(); + if (VTy->getScalarSizeInBits() < DestTy->getScalarSizeInBits()) return CreateSExt(V, DestTy, Name); - if (IntTy->getBitWidth() > DestTy->getBitWidth()) + if (VTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits()) return CreateTrunc(V, DestTy, Name); return V; } @@ -1107,8 +1137,9 @@ public: return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name); } private: - // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a compile time - // error, instead of converting the string to bool for the isSigned parameter. + // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a + // compile time error, instead of converting the string to bool for the + // isSigned parameter. Value *CreateIntCast(Value *, Type *, const char *) LLVM_DELETED_FUNCTION; public: Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") { @@ -1311,30 +1342,31 @@ public: LandingPadInst *CreateLandingPad(Type *Ty, Value *PersFn, unsigned NumClauses, const Twine &Name = "") { - return Insert(LandingPadInst::Create(Ty, PersFn, NumClauses, Name)); + return Insert(LandingPadInst::Create(Ty, PersFn, NumClauses), Name); } //===--------------------------------------------------------------------===// // Utility creation methods //===--------------------------------------------------------------------===// - /// CreateIsNull - Return an i1 value testing if \p Arg is null. + /// \brief Return an i1 value testing if \p Arg is null. Value *CreateIsNull(Value *Arg, const Twine &Name = "") { return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()), Name); } - /// CreateIsNotNull - Return an i1 value testing if \p Arg is not null. + /// \brief Return an i1 value testing if \p Arg is not null. Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") { return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()), Name); } - /// CreatePtrDiff - Return the i64 difference between two pointer values, - /// dividing out the size of the pointed-to objects. This is intended to - /// implement C-style pointer subtraction. As such, the pointers must be - /// appropriately aligned for their element types and pointing into the - /// same object. + /// \brief Return the i64 difference between two pointer values, dividing out + /// the size of the pointed-to objects. + /// + /// This is intended to implement C-style pointer subtraction. As such, the + /// pointers must be appropriately aligned for their element types and + /// pointing into the same object. Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") { assert(LHS->getType() == RHS->getType() && "Pointer subtraction operand types must match!"); @@ -1346,6 +1378,22 @@ public: ConstantExpr::getSizeOf(ArgType->getElementType()), Name); } + + /// \brief Return a vector value that contains \arg V broadcasted to \p + /// NumElts elements. + Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { + assert(NumElts > 0 && "Cannot splat to an empty vector!"); + + // First insert it into an undef vector so we can shuffle it. + Type *I32Ty = getInt32Ty(); + Value *Undef = UndefValue::get(VectorType::get(V->getType(), NumElts)); + V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0), + Name + ".splatinsert"); + + // Shuffle the value across the desired number of elements. + Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, NumElts)); + return CreateShuffleVector(V, Undef, Zeros, Name + ".splat"); + } }; } diff --git a/include/llvm/InlineAsm.h b/include/llvm/IR/InlineAsm.h similarity index 99% rename from include/llvm/InlineAsm.h rename to include/llvm/IR/InlineAsm.h index b5e0fd4effd6..33e4ab8522d1 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INLINEASM_H -#define LLVM_INLINEASM_H +#ifndef LLVM_IR_INLINEASM_H +#define LLVM_IR_INLINEASM_H -#include "llvm/Value.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Value.h" #include namespace llvm { diff --git a/include/llvm/InstrTypes.h b/include/llvm/IR/InstrTypes.h similarity index 99% rename from include/llvm/InstrTypes.h rename to include/llvm/IR/InstrTypes.h index da17f3b80d7b..3e6903cb52d7 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INSTRUCTION_TYPES_H -#define LLVM_INSTRUCTION_TYPES_H +#ifndef LLVM_IR_INSTRTYPES_H +#define LLVM_IR_INSTRTYPES_H -#include "llvm/Instruction.h" -#include "llvm/OperandTraits.h" -#include "llvm/DerivedTypes.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/OperandTraits.h" namespace llvm { @@ -177,19 +177,19 @@ public: const Twine &Name = "") {\ return Create(Instruction::OPC, V1, V2, Name);\ } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" #define HANDLE_BINARY_INST(N, OPC, CLASS) \ static BinaryOperator *Create##OPC(Value *V1, Value *V2, \ const Twine &Name, BasicBlock *BB) {\ return Create(Instruction::OPC, V1, V2, Name, BB);\ } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" #define HANDLE_BINARY_INST(N, OPC, CLASS) \ static BinaryOperator *Create##OPC(Value *V1, Value *V2, \ const Twine &Name, Instruction *I) {\ return Create(Instruction::OPC, V1, V2, Name, I);\ } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name = "") { @@ -309,7 +309,7 @@ public: /// NEG, FNeg, or NOT instruction. /// static bool isNeg(const Value *V); - static bool isFNeg(const Value *V); + static bool isFNeg(const Value *V, bool IgnoreZeroSign=false); static bool isNot(const Value *V); /// getNegArgument, getNotArgument - Helper functions to extract the diff --git a/include/llvm/Instruction.def b/include/llvm/IR/Instruction.def similarity index 100% rename from include/llvm/Instruction.def rename to include/llvm/IR/Instruction.def diff --git a/include/llvm/Instruction.h b/include/llvm/IR/Instruction.h similarity index 84% rename from include/llvm/Instruction.h rename to include/llvm/IR/Instruction.h index 8aa8a56bf825..5721d8f2f3fb 100644 --- a/include/llvm/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -12,15 +12,16 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INSTRUCTION_H -#define LLVM_INSTRUCTION_H +#ifndef LLVM_IR_INSTRUCTION_H +#define LLVM_IR_INSTRUCTION_H -#include "llvm/User.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/IR/User.h" #include "llvm/Support/DebugLoc.h" namespace llvm { +class FastMathFlags; class LLVMContext; class MDNode; @@ -33,7 +34,7 @@ class Instruction : public User, public ilist_node { BasicBlock *Parent; DebugLoc DbgLoc; // 'dbg' Metadata cache. - + enum { /// HasMetadataBit - This is a bit stored in the SubClassData field which /// indicates whether this instruction has metadata attached to it or not. @@ -42,12 +43,12 @@ class Instruction : public User, public ilist_node { public: // Out of line virtual method, so the vtable, etc has a home. ~Instruction(); - + /// use_back - Specialize the methods defined in Value, as we know that an /// instruction can only be used by other instructions. Instruction *use_back() { return cast(*use_begin());} const Instruction *use_back() const { return cast(*use_begin());} - + inline const BasicBlock *getParent() const { return Parent; } inline BasicBlock *getParent() { return Parent; } @@ -77,16 +78,16 @@ public: //===--------------------------------------------------------------------===// // Subclass classification. //===--------------------------------------------------------------------===// - + /// getOpcode() returns a member of one of the enums like Instruction::Add. unsigned getOpcode() const { return getValueID() - InstructionVal; } - + const char *getOpcodeName() const { return getOpcodeName(getOpcode()); } bool isTerminator() const { return isTerminator(getOpcode()); } bool isBinaryOp() const { return isBinaryOp(getOpcode()); } bool isShift() { return isShift(getOpcode()); } bool isCast() const { return isCast(getOpcode()); } - + static const char* getOpcodeName(unsigned OpCode); static inline bool isTerminator(unsigned OpCode) { @@ -121,33 +122,33 @@ public: //===--------------------------------------------------------------------===// // Metadata manipulation. //===--------------------------------------------------------------------===// - + /// hasMetadata() - Return true if this instruction has any metadata attached /// to it. bool hasMetadata() const { return !DbgLoc.isUnknown() || hasMetadataHashEntry(); } - + /// hasMetadataOtherThanDebugLoc - Return true if this instruction has /// metadata attached to it other than a debug location. bool hasMetadataOtherThanDebugLoc() const { return hasMetadataHashEntry(); } - + /// getMetadata - Get the metadata of given kind attached to this Instruction. /// If the metadata is not found then return null. MDNode *getMetadata(unsigned KindID) const { if (!hasMetadata()) return 0; return getMetadataImpl(KindID); } - + /// getMetadata - Get the metadata of given kind attached to this Instruction. /// If the metadata is not found then return null. MDNode *getMetadata(StringRef Kind) const { if (!hasMetadata()) return 0; return getMetadataImpl(Kind); } - + /// getAllMetadata - Get all metadata attached to this Instruction. The first /// element of each pair returned is the KindID, the second element is the /// metadata value. This list is returned sorted by the KindID. @@ -155,7 +156,7 @@ public: if (hasMetadata()) getAllMetadataImpl(MDs); } - + /// getAllMetadataOtherThanDebugLoc - This does the same thing as /// getAllMetadata, except that it filters out the debug location. void getAllMetadataOtherThanDebugLoc(SmallVectorImplgetValueID() >= Value::InstructionVal; @@ -321,35 +381,35 @@ public: #define FIRST_TERM_INST(N) TermOpsBegin = N, #define HANDLE_TERM_INST(N, OPC, CLASS) OPC = N, #define LAST_TERM_INST(N) TermOpsEnd = N+1 -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" }; enum BinaryOps { #define FIRST_BINARY_INST(N) BinaryOpsBegin = N, #define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N, #define LAST_BINARY_INST(N) BinaryOpsEnd = N+1 -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" }; enum MemoryOps { #define FIRST_MEMORY_INST(N) MemoryOpsBegin = N, #define HANDLE_MEMORY_INST(N, OPC, CLASS) OPC = N, #define LAST_MEMORY_INST(N) MemoryOpsEnd = N+1 -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" }; enum CastOps { #define FIRST_CAST_INST(N) CastOpsBegin = N, #define HANDLE_CAST_INST(N, OPC, CLASS) OPC = N, #define LAST_CAST_INST(N) CastOpsEnd = N+1 -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" }; enum OtherOps { #define FIRST_OTHER_INST(N) OtherOpsBegin = N, #define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N, #define LAST_OTHER_INST(N) OtherOpsEnd = N+1 -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" }; private: // Shadow Value::setValueSubclassData with a private forwarding method so that @@ -360,34 +420,34 @@ private: unsigned short getSubclassDataFromValue() const { return Value::getSubclassDataFromValue(); } - + void setHasMetadataHashEntry(bool V) { setValueSubclassData((getSubclassDataFromValue() & ~HasMetadataBit) | (V ? HasMetadataBit : 0)); } - + friend class SymbolTableListTraits; void setParent(BasicBlock *P); protected: // Instruction subclasses can stick up to 15 bits of stuff into the // SubclassData field of instruction with these members. - + // Verify that only the low 15 bits are used. void setInstructionSubclassData(unsigned short D) { assert((D & HasMetadataBit) == 0 && "Out of range value put into field"); setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D); } - + unsigned getSubclassDataFromInstruction() const { return getSubclassDataFromValue() & ~HasMetadataBit; } - + Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps, Instruction *InsertBefore = 0); Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd); virtual Instruction *clone_impl() const = 0; - + }; // Instruction* is only 4-byte aligned. @@ -401,7 +461,7 @@ public: } enum { NumLowBitsAvailable = 2 }; }; - + } // End llvm namespace #endif diff --git a/include/llvm/Instructions.h b/include/llvm/IR/Instructions.h similarity index 96% rename from include/llvm/Instructions.h rename to include/llvm/IR/Instructions.h index 69593b48c1f1..7e29699f73d9 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -13,25 +13,26 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INSTRUCTIONS_H -#define LLVM_INSTRUCTIONS_H +#ifndef LLVM_IR_INSTRUCTIONS_H +#define LLVM_IR_INSTRUCTIONS_H -#include "llvm/InstrTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Attributes.h" -#include "llvm/CallingConv.h" -#include "llvm/Support/IntegersSubset.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IntegersSubset.h" +#include "llvm/Support/IntegersSubsetMapping.h" #include namespace llvm { +class APInt; class ConstantInt; class ConstantRange; -class APInt; +class DataLayout; class LLVMContext; enum AtomicOrdering { @@ -90,7 +91,7 @@ public: /// getType - Overload to return most specific pointer type /// PointerType *getType() const { - return reinterpret_cast(Instruction::getType()); + return cast(Instruction::getType()); } /// getAllocatedType - Return the type that is being allocated by the @@ -280,7 +281,7 @@ public: unsigned Align, AtomicOrdering Order, SynchronizationScope SynchScope, BasicBlock *InsertAtEnd); - + /// isVolatile - Return true if this is a store to a volatile memory /// location. @@ -515,15 +516,15 @@ public: Value *getCompareOperand() { return getOperand(1); } const Value *getCompareOperand() const { return getOperand(1); } - + Value *getNewValOperand() { return getOperand(2); } const Value *getNewValOperand() const { return getOperand(2); } - + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { return getPointerOperand()->getType()->getPointerAddressSpace(); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::AtomicCmpXchg; @@ -761,9 +762,9 @@ public: /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - // getType - Overload to return most specific pointer type... - PointerType *getType() const { - return reinterpret_cast(Instruction::getType()); + // getType - Overload to return most specific sequential type. + SequentialType *getType() const { + return cast(Instruction::getType()); } /// \brief Returns the address space of this instruction's pointer type. @@ -850,6 +851,16 @@ public: /// isInBounds - Determine whether the GEP has the inbounds flag. bool isInBounds() const; + /// \brief Accumulate the constant address offset of this GEP if possible. + /// + /// This routine accepts an APInt into which it will accumulate the constant + /// offset of this GEP if the GEP is in fact constant. If the GEP is not + /// all-constant, it returns false and the value of the offset APInt is + /// undefined (it is *not* preserved!). The APInt passed into this routine + /// must be at least as wide as the IntPtr type for the address space of + /// the base GEP pointer. + bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::GetElementPtr); @@ -942,7 +953,7 @@ public: "Both operands to ICmp instruction are not of the same type!"); // Check that the operands are the right type assert((getOperand(0)->getType()->isIntOrIntVectorTy() || - getOperand(0)->getType()->isPointerTy()) && + getOperand(0)->getType()->getScalarType()->isPointerTy()) && "Invalid operand types for ICmp instruction"); } @@ -1156,7 +1167,7 @@ public: /// hold the calling convention of the call. /// class CallInst : public Instruction { - AttrListPtr AttributeList; ///< parameter attributes for call + AttributeSet AttributeList; ///< parameter attributes for call CallInst(const CallInst &CI); void init(Value *Func, ArrayRef Args, const Twine &NameStr); void init(Value *Func, const Twine &NameStr); @@ -1254,23 +1265,23 @@ public: /// getAttributes - Return the parameter attributes for this call. /// - const AttrListPtr &getAttributes() const { return AttributeList; } + const AttributeSet &getAttributes() const { return AttributeList; } /// setAttributes - Set the parameter attributes for this call. /// - void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; } + void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; } /// addAttribute - adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attributes attr); + void addAttribute(unsigned i, Attribute::AttrKind attr); /// removeAttribute - removes the attribute from the list of attributes. - void removeAttribute(unsigned i, Attributes attr); + void removeAttribute(unsigned i, Attribute attr); /// \brief Determine whether this call has the given attribute. - bool hasFnAttr(Attributes::AttrVal A) const; + bool hasFnAttr(Attribute::AttrKind A) const; /// \brief Determine whether the call or the callee has the given attributes. - bool paramHasAttr(unsigned i, Attributes::AttrVal A) const; + bool paramHasAttr(unsigned i, Attribute::AttrKind A) const; /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { @@ -1278,66 +1289,63 @@ public: } /// \brief Return true if the call should not be inlined. - bool isNoInline() const { return hasFnAttr(Attributes::NoInline); } + bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoInline)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline); } /// \brief Return true if the call can return twice bool canReturnTwice() const { - return hasFnAttr(Attributes::ReturnsTwice); + return hasFnAttr(Attribute::ReturnsTwice); } void setCanReturnTwice() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::ReturnsTwice)); + addAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice); } /// \brief Determine if the call does not access memory. bool doesNotAccessMemory() const { - return hasFnAttr(Attributes::ReadNone); + return hasFnAttr(Attribute::ReadNone); } void setDoesNotAccessMemory() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::ReadNone)); + addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); } /// \brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly); + return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } void setOnlyReadsMemory() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::ReadOnly)); + addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly); } /// \brief Determine if the call cannot return. - bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); } + bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoReturn)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn); } /// \brief Determine if the call cannot unwind. - bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); } + bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); } void setDoesNotThrow() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoUnwind)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + } + + /// \brief Determine if the call cannot be duplicated. + bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); } + void setCannotDuplicate() { + addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate); } /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { // Be friendly and also check the callee. - return paramHasAttr(1, Attributes::StructRet); + return paramHasAttr(1, Attribute::StructRet); } /// \brief Determine if any call argument is an aggregate passed by value. bool hasByValArgument() const { - for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I) - if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal)) - return true; - return false; + return AttributeList.hasAttrSomewhere(Attribute::ByVal); } /// getCalledFunction - Return the function called, or null if this is an @@ -1555,7 +1563,7 @@ public: const Value *getIndexOperand() const { return Op<1>(); } VectorType *getVectorOperandType() const { - return reinterpret_cast(getVectorOperand()->getType()); + return cast(getVectorOperand()->getType()); } @@ -1614,7 +1622,7 @@ public: /// getType - Overload to return most specific vector type. /// VectorType *getType() const { - return reinterpret_cast(Instruction::getType()); + return cast(Instruction::getType()); } /// Transparently provide more efficient getOperand methods. @@ -1666,16 +1674,16 @@ public: /// getType - Overload to return most specific vector type. /// VectorType *getType() const { - return reinterpret_cast(Instruction::getType()); + return cast(Instruction::getType()); } /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); Constant *getMask() const { - return reinterpret_cast(getOperand(2)); + return cast(getOperand(2)); } - + /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. @@ -1684,7 +1692,7 @@ public: int getMaskValue(unsigned i) const { return getMaskValue(getMask(), i); } - + /// getShuffleMask - Return the full mask for this instruction, where each /// element is the element number and undef's are returned as -1. static void getShuffleMask(Constant *Mask, SmallVectorImpl &Result); @@ -2001,7 +2009,7 @@ public: Instruction *InsertBefore = 0) { return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore); } - static PHINode *Create(Type *Ty, unsigned NumReservedValues, + static PHINode *Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock *InsertAtEnd) { return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd); } @@ -2440,7 +2448,7 @@ class SwitchInst : public TerminatorInst { // Operand[1] = Default basic block destination // Operand[2n ] = Value to match // Operand[2n+1] = BasicBlock to go to on match - + // Store case values separately from operands list. We needn't User-Use // concept here, since it is just a case value, it will always constant, // and case value couldn't reused with another instructions/values. @@ -2457,9 +2465,9 @@ class SwitchInst : public TerminatorInst { typedef std::list Subsets; typedef Subsets::iterator SubsetsIt; typedef Subsets::const_iterator SubsetsConstIt; - + Subsets TheSubsets; - + SwitchInst(const SwitchInst &SI); void init(Value *Value, BasicBlock *Default, unsigned NumReserved); void growOperands(); @@ -2483,7 +2491,7 @@ class SwitchInst : public TerminatorInst { protected: virtual SwitchInst *clone_impl() const; public: - + // FIXME: Currently there are a lot of unclean template parameters, // we need to make refactoring in future. // All these parameters are used to implement both iterator and const_iterator @@ -2493,16 +2501,16 @@ public: // SubsetsItTy may be SubsetsConstIt or SubsetsIt // BasicBlockTy may be "const BasicBlock" or "BasicBlock" template + class SubsetsItTy, class BasicBlockTy> class CaseIteratorT; typedef CaseIteratorT ConstCaseIt; class CaseIt; - + // -2 static const unsigned DefaultPseudoIndex = static_cast(~0L-1); - + static SwitchInst *Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore = 0) { return new SwitchInst(Value, Default, NumCases, InsertBefore); @@ -2511,7 +2519,7 @@ public: unsigned NumCases, BasicBlock *InsertAtEnd) { return new SwitchInst(Value, Default, NumCases, InsertAtEnd); } - + ~SwitchInst(); /// Provide fast operand accessors @@ -2545,7 +2553,7 @@ public: ConstCaseIt case_begin() const { return ConstCaseIt(this, 0, TheSubsets.begin()); } - + /// Returns a read/write iterator that points one past the last /// in the SwitchInst. CaseIt case_end() { @@ -2560,14 +2568,14 @@ public: /// Note: this iterator allows to resolve successor only. Attempt /// to resolve case value causes an assertion. /// Also note, that increment and decrement also causes an assertion and - /// makes iterator invalid. + /// makes iterator invalid. CaseIt case_default() { return CaseIt(this, DefaultPseudoIndex, TheSubsets.end()); } ConstCaseIt case_default() const { return ConstCaseIt(this, DefaultPseudoIndex, TheSubsets.end()); } - + /// findCaseValue - Search all of the case values for the specified constant. /// If it is explicitly handled, return the case iterator of it, otherwise /// return default case iterator to indicate @@ -2583,8 +2591,8 @@ public: if (i.getCaseValueEx().isSatisfies(IntItem::fromConstantInt(C))) return i; return case_default(); - } - + } + /// findCaseDest - Finds the unique case value for a given successor. Returns /// null if the successor is not found, not unique, or is the default case. ConstantInt *findCaseDest(BasicBlock *BB) { @@ -2606,7 +2614,7 @@ public: /// This action invalidates case_end(). Old case_end() iterator will /// point to the added case. void addCase(ConstantInt *OnVal, BasicBlock *Dest); - + /// addCase - Add an entry to the switch instruction. /// Note: /// This action invalidates case_end(). Old case_end() iterator will @@ -2630,31 +2638,31 @@ public: assert(idx < getNumSuccessors() && "Successor # out of range for switch!"); setOperand(idx*2+1, (Value*)NewSucc); } - + uint16_t hash() const { uint32_t NumberOfCases = (uint32_t)getNumCases(); uint16_t Hash = (0xFFFF & NumberOfCases) ^ (NumberOfCases >> 16); for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i) { - uint32_t NumItems = (uint32_t)i.getCaseValueEx().getNumItems(); + uint32_t NumItems = (uint32_t)i.getCaseValueEx().getNumItems(); Hash = (Hash << 1) ^ (0xFFFF & NumItems) ^ (NumItems >> 16); } return Hash; - } - + } + // Case iterators definition. template + class SubsetsItTy, class BasicBlockTy> class CaseIteratorT { protected: - + SwitchInstTy *SI; - unsigned long Index; + unsigned Index; SubsetsItTy SubsetIt; - + /// Initializes case iterator for given SwitchInst and for given - /// case number. + /// case number. friend class SwitchInst; CaseIteratorT(SwitchInstTy *SI, unsigned SuccessorIndex, SubsetsItTy CaseValueIt) { @@ -2662,36 +2670,36 @@ public: Index = SuccessorIndex; this->SubsetIt = CaseValueIt; } - + public: typedef typename SubsetsItTy::reference IntegersSubsetRef; typedef CaseIteratorT Self; - + CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) { this->SI = SI; Index = CaseNum; SubsetIt = SI->TheSubsets.begin(); std::advance(SubsetIt, CaseNum); } - - + + /// Initializes case iterator for given SwitchInst and for given /// TerminatorInst's successor index. static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) { assert(SuccessorIndex < SI->getNumSuccessors() && - "Successor index # out of range!"); - return SuccessorIndex != 0 ? + "Successor index # out of range!"); + return SuccessorIndex != 0 ? Self(SI, SuccessorIndex - 1) : - Self(SI, DefaultPseudoIndex); + Self(SI, DefaultPseudoIndex); } - + /// Resolves case value for current case. /// @deprecated ConstantIntTy *getCaseValue() { assert(Index < SI->getNumCases() && "Index out the number of cases."); IntegersSubsetRef CaseRanges = *SubsetIt; - + // FIXME: Currently we work with ConstantInt based cases. // So return CaseValue as ConstantInt. return CaseRanges.getSingleNumber(0).toConstantInt(); @@ -2702,25 +2710,25 @@ public: assert(Index < SI->getNumCases() && "Index out the number of cases."); return *SubsetIt; } - + /// Resolves successor for current case. BasicBlockTy *getCaseSuccessor() { assert((Index < SI->getNumCases() || Index == DefaultPseudoIndex) && "Index out the number of cases."); - return SI->getSuccessor(getSuccessorIndex()); + return SI->getSuccessor(getSuccessorIndex()); } - + /// Returns number of current case. unsigned getCaseIndex() const { return Index; } - + /// Returns TerminatorInst's successor index for current case successor. unsigned getSuccessorIndex() const { assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) && "Index out the number of cases."); return Index != DefaultPseudoIndex ? Index + 1 : 0; } - + Self operator++() { // Check index correctness after increment. // Note: Index == getNumCases() means end(). @@ -2737,7 +2745,7 @@ public: ++(*this); return tmp; } - Self operator--() { + Self operator--() { // Check index correctness after decrement. // Note: Index == getNumCases() means end(). // Also allow "-1" iterator here. That will became valid after ++. @@ -2749,10 +2757,10 @@ public: SubsetIt = SI->TheSubsets.end(); return *this; } - - if (Index != -1UL) + + if (Index != -1U) --SubsetIt; - + return *this; } Self operator--(int) { @@ -2774,23 +2782,23 @@ public: SubsetsIt, BasicBlock> { typedef CaseIteratorT ParentTy; - + protected: friend class SwitchInst; CaseIt(SwitchInst *SI, unsigned CaseNum, SubsetsIt SubsetIt) : ParentTy(SI, CaseNum, SubsetIt) {} - + void updateCaseValueOperand(IntegersSubset& V) { - SI->setOperand(2 + Index*2, reinterpret_cast((Constant*)V)); + SI->setOperand(2 + Index*2, reinterpret_cast((Constant*)V)); } - + public: - CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} - + CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} + CaseIt(const ParentTy& Src) : ParentTy(Src) {} - /// Sets the new value for current case. + /// Sets the new value for current case. /// @deprecated. void setValue(ConstantInt *V) { assert(Index < SI->getNumCases() && "Index out the number of cases."); @@ -2801,17 +2809,17 @@ public: *SubsetIt = Mapping.getCase(); updateCaseValueOperand(*SubsetIt); } - + /// Sets the new value for current case. void setValueEx(IntegersSubset& V) { assert(Index < SI->getNumCases() && "Index out the number of cases."); *SubsetIt = V; - updateCaseValueOperand(*SubsetIt); + updateCaseValueOperand(*SubsetIt); } - + /// Sets the new successor for current case. void setSuccessor(BasicBlock *S) { - SI->setSuccessor(getSuccessorIndex(), S); + SI->setSuccessor(getSuccessorIndex(), S); } }; @@ -2942,7 +2950,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value) /// calling convention of the call. /// class InvokeInst : public TerminatorInst { - AttrListPtr AttributeList; + AttributeSet AttributeList; InvokeInst(const InvokeInst &BI); void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, const Twine &NameStr); @@ -3003,23 +3011,23 @@ public: /// getAttributes - Return the parameter attributes for this invoke. /// - const AttrListPtr &getAttributes() const { return AttributeList; } + const AttributeSet &getAttributes() const { return AttributeList; } /// setAttributes - Set the parameter attributes for this invoke. /// - void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; } + void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; } /// addAttribute - adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attributes attr); + void addAttribute(unsigned i, Attribute::AttrKind attr); /// removeAttribute - removes the attribute from the list of attributes. - void removeAttribute(unsigned i, Attributes attr); + void removeAttribute(unsigned i, Attribute attr); /// \brief Determine whether this call has the NoAlias attribute. - bool hasFnAttr(Attributes::AttrVal A) const; + bool hasFnAttr(Attribute::AttrKind A) const; /// \brief Determine whether the call or the callee has the given attributes. - bool paramHasAttr(unsigned i, Attributes::AttrVal A) const; + bool paramHasAttr(unsigned i, Attribute::AttrKind A) const; /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { @@ -3027,57 +3035,49 @@ public: } /// \brief Return true if the call should not be inlined. - bool isNoInline() const { return hasFnAttr(Attributes::NoInline); } + bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoInline)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline); } /// \brief Determine if the call does not access memory. bool doesNotAccessMemory() const { - return hasFnAttr(Attributes::ReadNone); + return hasFnAttr(Attribute::ReadNone); } void setDoesNotAccessMemory() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::ReadNone)); + addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); } /// \brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly); + return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } void setOnlyReadsMemory() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::ReadOnly)); + addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly); } /// \brief Determine if the call cannot return. - bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); } + bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoReturn)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn); } /// \brief Determine if the call cannot unwind. - bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); } + bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); } void setDoesNotThrow() { - addAttribute(AttrListPtr::FunctionIndex, - Attributes::get(getContext(), Attributes::NoUnwind)); + addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); } /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { // Be friendly and also check the callee. - return paramHasAttr(1, Attributes::StructRet); + return paramHasAttr(1, Attribute::StructRet); } /// \brief Determine if any call argument is an aggregate passed by value. bool hasByValArgument() const { - for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I) - if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal)) - return true; - return false; + return AttributeList.hasAttrSomewhere(Attribute::ByVal); } /// getCalledFunction - Return the function called, or null if this is an diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h similarity index 97% rename from include/llvm/IntrinsicInst.h rename to include/llvm/IR/IntrinsicInst.h index 9b2afd56e05f..8344c56680aa 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -21,13 +21,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INTRINSICINST_H -#define LLVM_INTRINSICINST_H +#ifndef LLVM_IR_INTRINSICINST_H +#define LLVM_IR_INTRINSICINST_H -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" namespace llvm { /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic @@ -47,14 +47,14 @@ namespace llvm { // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const CallInst *I) { if (const Function *CF = I->getCalledFunction()) - return CF->getIntrinsicID() != 0; + return CF->isIntrinsic(); return false; } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; - + /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics /// class DbgInfoIntrinsic : public IntrinsicInst { diff --git a/include/llvm/Intrinsics.h b/include/llvm/IR/Intrinsics.h similarity index 94% rename from include/llvm/Intrinsics.h rename to include/llvm/IR/Intrinsics.h index 3108a8e5251c..c97cd91d73a9 100644 --- a/include/llvm/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_INTRINSICS_H -#define LLVM_INTRINSICS_H +#ifndef LLVM_IR_INTRINSICS_H +#define LLVM_IR_INTRINSICS_H #include "llvm/ADT/ArrayRef.h" #include @@ -26,7 +26,7 @@ class FunctionType; class Function; class LLVMContext; class Module; -class AttrListPtr; +class AttributeSet; /// Intrinsic Namespace - This namespace contains an enum with a value for /// every intrinsic/builtin function known by LLVM. These enum values are @@ -38,7 +38,7 @@ namespace Intrinsic { // Get the intrinsic enums generated from Intrinsics.td #define GET_INTRINSIC_ENUM_VALUES -#include "llvm/Intrinsics.gen" +#include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_ENUM_VALUES , num_intrinsics }; @@ -58,7 +58,7 @@ namespace Intrinsic { /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic. /// - AttrListPtr getAttributes(LLVMContext &C, ID id); + AttributeSet getAttributes(LLVMContext &C, ID id); /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function /// declaration for an intrinsic, and return it. @@ -79,7 +79,7 @@ namespace Intrinsic { /// getIntrinsicInfoTableEntries. struct IITDescriptor { enum IITDescriptorKind { - Void, MMX, Metadata, Float, Double, + Void, MMX, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, Argument, ExtendVecArgument, TruncVecArgument } Kind; diff --git a/include/llvm/Intrinsics.td b/include/llvm/IR/Intrinsics.td similarity index 95% rename from include/llvm/Intrinsics.td rename to include/llvm/IR/Intrinsics.td index 2e1597fe6f6b..e252664e45cf 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -106,6 +106,7 @@ def llvm_i8_ty : LLVMType; def llvm_i16_ty : LLVMType; def llvm_i32_ty : LLVMType; def llvm_i64_ty : LLVMType; +def llvm_half_ty : LLVMType; def llvm_float_ty : LLVMType; def llvm_double_ty : LLVMType; def llvm_f80_ty : LLVMType; @@ -125,16 +126,22 @@ def llvm_v2i1_ty : LLVMType; // 2 x i1 def llvm_v4i1_ty : LLVMType; // 4 x i1 def llvm_v8i1_ty : LLVMType; // 8 x i1 def llvm_v16i1_ty : LLVMType; // 16 x i1 +def llvm_v32i1_ty : LLVMType; // 32 x i1 +def llvm_v64i1_ty : LLVMType; // 64 x i1 def llvm_v2i8_ty : LLVMType; // 2 x i8 def llvm_v4i8_ty : LLVMType; // 4 x i8 def llvm_v8i8_ty : LLVMType; // 8 x i8 def llvm_v16i8_ty : LLVMType; // 16 x i8 def llvm_v32i8_ty : LLVMType; // 32 x i8 +def llvm_v64i8_ty : LLVMType; // 64 x i8 + def llvm_v1i16_ty : LLVMType; // 1 x i16 def llvm_v2i16_ty : LLVMType; // 2 x i16 def llvm_v4i16_ty : LLVMType; // 4 x i16 def llvm_v8i16_ty : LLVMType; // 8 x i16 def llvm_v16i16_ty : LLVMType; // 16 x i16 +def llvm_v32i16_ty : LLVMType; // 32 x i16 + def llvm_v1i32_ty : LLVMType; // 1 x i32 def llvm_v2i32_ty : LLVMType; // 2 x i32 def llvm_v4i32_ty : LLVMType; // 4 x i32 @@ -149,8 +156,10 @@ def llvm_v16i64_ty : LLVMType; // 16 x i64 def llvm_v2f32_ty : LLVMType; // 2 x float def llvm_v4f32_ty : LLVMType; // 4 x float def llvm_v8f32_ty : LLVMType; // 8 x float +def llvm_v16f32_ty : LLVMType; // 16 x float def llvm_v2f64_ty : LLVMType; // 2 x double def llvm_v4f64_ty : LLVMType; // 4 x double +def llvm_v8f64_ty : LLVMType; // 8 x double def llvm_vararg_ty : LLVMType; // this means vararg here @@ -271,6 +280,10 @@ let Properties = [IntrReadMem] in { def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; } let Properties = [IntrNoMem] in { @@ -461,11 +474,11 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty], // Target-specific intrinsics //===----------------------------------------------------------------------===// -include "llvm/IntrinsicsPowerPC.td" -include "llvm/IntrinsicsX86.td" -include "llvm/IntrinsicsARM.td" -include "llvm/IntrinsicsCellSPU.td" -include "llvm/IntrinsicsXCore.td" -include "llvm/IntrinsicsHexagon.td" -include "llvm/IntrinsicsNVVM.td" -include "llvm/IntrinsicsMips.td" +include "llvm/IR/IntrinsicsPowerPC.td" +include "llvm/IR/IntrinsicsX86.td" +include "llvm/IR/IntrinsicsARM.td" +include "llvm/IR/IntrinsicsXCore.td" +include "llvm/IR/IntrinsicsHexagon.td" +include "llvm/IR/IntrinsicsNVVM.td" +include "llvm/IR/IntrinsicsMips.td" +include "llvm/IR/IntrinsicsR600.td" diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td similarity index 100% rename from include/llvm/IntrinsicsARM.td rename to include/llvm/IR/IntrinsicsARM.td diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td similarity index 100% rename from include/llvm/IntrinsicsHexagon.td rename to include/llvm/IR/IntrinsicsHexagon.td diff --git a/include/llvm/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td similarity index 100% rename from include/llvm/IntrinsicsMips.td rename to include/llvm/IR/IntrinsicsMips.td diff --git a/include/llvm/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td similarity index 97% rename from include/llvm/IntrinsicsNVVM.td rename to include/llvm/IR/IntrinsicsNVVM.td index 1853c9988b47..ebfd03e48492 100644 --- a/include/llvm/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -805,6 +805,16 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMPointerType>], [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.p">; +// Generated within nvvm. Use for ldg on sm_35 or later +def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], + [LLVMPointerType>], [IntrReadMem, NoCapture<0>], + "llvm.nvvm.ldg.global.i">; +def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], + [LLVMPointerType>], [IntrReadMem, NoCapture<0>], + "llvm.nvvm.ldg.global.f">; +def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], + [LLVMPointerType>], [IntrReadMem, NoCapture<0>], + "llvm.nvvm.ldg.global.p">; // Use for generic pointers // - These intrinsics are used to convert address spaces. @@ -815,36 +825,36 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], // of pointer to another type of pointer, while the address space remains // the same. def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.local.to.gen">; def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.shared.to.gen">; def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.global.to.gen">; def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.constant.to.gen">; def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.gen.to.global">; def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.gen.to.shared">; def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.gen.to.local">; def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], + [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.ptr.gen.to.constant">; // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem, NoCapture<0>], + [IntrNoMem], "llvm.nvvm.ptr.gen.to.param">; // Move intrinsics, used in nvvm internally diff --git a/include/llvm/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td similarity index 99% rename from include/llvm/IntrinsicsPowerPC.td rename to include/llvm/IR/IntrinsicsPowerPC.td index da85bfba8631..cde39ccd3c52 100644 --- a/include/llvm/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -22,7 +22,8 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], []>; + def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], + [IntrReadWriteArgMem, NoCapture<0>]>; def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td new file mode 100644 index 000000000000..ecb5668d8e95 --- /dev/null +++ b/include/llvm/IR/IntrinsicsR600.td @@ -0,0 +1,36 @@ +//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the R600-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "r600" in { + +class R600ReadPreloadRegisterIntrinsic + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + +multiclass R600ReadPreloadRegisterIntrinsic_xyz { + def _x : R600ReadPreloadRegisterIntrinsic; + def _y : R600ReadPreloadRegisterIntrinsic; + def _z : R600ReadPreloadRegisterIntrinsic; +} + +defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_global_size">; +defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_local_size">; +defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_ngroups">; +defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tgid">; +defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tidig">; +} // End TargetPrefix = "r600" diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td similarity index 99% rename from include/llvm/IntrinsicsX86.td rename to include/llvm/IR/IntrinsicsX86.td index d2463c0efa14..69e0ab4fa2ed 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2550,7 +2550,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// -// RDRAND intrinsics. Return a random value and whether it is valid. +// RDRAND intrinsics - Return a random value and whether it is valid. +// RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and +// whether it is valid. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // These are declared side-effecting so they don't get eliminated by CSE or @@ -2558,6 +2560,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; } //===----------------------------------------------------------------------===// @@ -2570,4 +2575,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [], []>; def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">, Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>; + def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, + Intrinsic<[llvm_i32_ty], [], []>; } diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IR/IntrinsicsXCore.td similarity index 100% rename from include/llvm/IntrinsicsXCore.td rename to include/llvm/IR/IntrinsicsXCore.td diff --git a/include/llvm/LLVMContext.h b/include/llvm/IR/LLVMContext.h similarity index 96% rename from include/llvm/LLVMContext.h rename to include/llvm/IR/LLVMContext.h index 5903e2e55e1f..ae81e5b1c3bc 100644 --- a/include/llvm/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LLVMCONTEXT_H -#define LLVM_LLVMCONTEXT_H +#ifndef LLVM_IR_LLVMCONTEXT_H +#define LLVM_IR_LLVMCONTEXT_H #include "llvm/Support/Compiler.h" @@ -46,7 +46,8 @@ public: MD_prof = 2, // "prof" MD_fpmath = 3, // "fpmath" MD_range = 4, // "range" - MD_tbaa_struct = 5 // "tbaa.struct" + MD_tbaa_struct = 5, // "tbaa.struct" + MD_invariant_load = 6 // "invariant.load" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h new file mode 100644 index 000000000000..a1e3fb1966ed --- /dev/null +++ b/include/llvm/IR/MDBuilder.h @@ -0,0 +1,186 @@ +//===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MDBuilder class, which is used as a convenient way to +// create LLVM metadata with a consistent and simplified interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_MDBUILDER_H +#define LLVM_IR_MDBUILDER_H + +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Metadata.h" + +namespace llvm { + +class APInt; +class LLVMContext; + +class MDBuilder { + LLVMContext &Context; + +public: + MDBuilder(LLVMContext &context) : Context(context) {} + + /// \brief Return the given string as metadata. + MDString *createString(StringRef Str) { + return MDString::get(Context, Str); + } + + //===------------------------------------------------------------------===// + // FPMath metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata with the given settings. The special value 0.0 + /// for the Accuracy parameter indicates the default (maximal precision) + /// setting. + MDNode *createFPMath(float Accuracy) { + if (Accuracy == 0.0) + return 0; + assert(Accuracy > 0.0 && "Invalid fpmath accuracy!"); + Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy); + return MDNode::get(Context, Op); + } + + //===------------------------------------------------------------------===// + // Prof metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata containing two branch weights. + MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight) { + uint32_t Weights[] = { TrueWeight, FalseWeight }; + return createBranchWeights(Weights); + } + + /// \brief Return metadata containing a number of branch weights. + MDNode *createBranchWeights(ArrayRef Weights) { + assert(Weights.size() >= 2 && "Need at least two branch weights!"); + + SmallVector Vals(Weights.size()+1); + Vals[0] = createString("branch_weights"); + + Type *Int32Ty = Type::getInt32Ty(Context); + for (unsigned i = 0, e = Weights.size(); i != e; ++i) + Vals[i+1] = ConstantInt::get(Int32Ty, Weights[i]); + + return MDNode::get(Context, Vals); + } + + //===------------------------------------------------------------------===// + // Range metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata describing the range [Lo, Hi). + MDNode *createRange(const APInt &Lo, const APInt &Hi) { + assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!"); + // If the range is everything then it is useless. + if (Hi == Lo) + return 0; + + // Return the range [Lo, Hi). + Type *Ty = IntegerType::get(Context, Lo.getBitWidth()); + Value *Range[2] = { ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi) }; + return MDNode::get(Context, Range); + } + + + //===------------------------------------------------------------------===// + // TBAA metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata appropriate for a TBAA root node. Each returned + /// node is distinct from all other metadata and will never be identified + /// (uniqued) with anything else. + MDNode *createAnonymousTBAARoot() { + // To ensure uniqueness the root node is self-referential. + MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef()); + MDNode *Root = MDNode::get(Context, Dummy); + // At this point we have + // !0 = metadata !{} <- dummy + // !1 = metadata !{metadata !0} <- root + // Replace the dummy operand with the root node itself and delete the dummy. + Root->replaceOperandWith(0, Root); + MDNode::deleteTemporary(Dummy); + // We now have + // !1 = metadata !{metadata !1} <- self-referential root + return Root; + } + + /// \brief Return metadata appropriate for a TBAA root node with the given + /// name. This may be identified (uniqued) with other roots with the same + /// name. + MDNode *createTBAARoot(StringRef Name) { + return MDNode::get(Context, createString(Name)); + } + + /// \brief Return metadata for a non-root TBAA node with the given name, + /// parent in the TBAA tree, and value for 'pointsToConstantMemory'. + MDNode *createTBAANode(StringRef Name, MDNode *Parent, + bool isConstant = false) { + if (isConstant) { + Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1); + Value *Ops[3] = { createString(Name), Parent, Flags }; + return MDNode::get(Context, Ops); + } else { + Value *Ops[2] = { createString(Name), Parent }; + return MDNode::get(Context, Ops); + } + } + + struct TBAAStructField { + uint64_t Offset; + uint64_t Size; + MDNode *TBAA; + TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) : + Offset(Offset), Size(Size), TBAA(TBAA) {} + }; + + /// \brief Return metadata for a tbaa.struct node with the given + /// struct field descriptions. + MDNode *createTBAAStructNode(ArrayRef Fields) { + SmallVector Vals(Fields.size() * 3); + Type *Int64 = IntegerType::get(Context, 64); + for (unsigned i = 0, e = Fields.size(); i != e; ++i) { + Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset); + Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size); + Vals[i * 3 + 2] = Fields[i].TBAA; + } + return MDNode::get(Context, Vals); + } + + /// \brief Return metadata for a TBAA struct node in the type DAG + /// with the given name, parents in the TBAA DAG. + MDNode *createTBAAStructTypeNode(StringRef Name, + ArrayRef > Fields) { + SmallVector Ops(Fields.size() * 2 + 1); + Type *Int64 = IntegerType::get(Context, 64); + Ops[0] = createString(Name); + for (unsigned i = 0, e = Fields.size(); i != e; ++i) { + Ops[i * 2 + 1] = ConstantInt::get(Int64, Fields[i].first); + Ops[i * 2 + 2] = Fields[i].second; + } + return MDNode::get(Context, Ops); + } + + /// \brief Return metadata for a TBAA tag node with the given + /// base type, access type and offset relative to the base type. + MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, + uint64_t Offset) { + Type *Int64 = IntegerType::get(Context, 64); + Value *Ops[3] = { BaseType, AccessType, ConstantInt::get(Int64, Offset) }; + return MDNode::get(Context, Ops); + } + +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Metadata.h b/include/llvm/IR/Metadata.h similarity index 97% rename from include/llvm/Metadata.h rename to include/llvm/IR/Metadata.h index 0fbbb959888b..8c2cfac235d2 100644 --- a/include/llvm/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_METADATA_H -#define LLVM_METADATA_H +#ifndef LLVM_IR_METADATA_H +#define LLVM_IR_METADATA_H -#include "llvm/Value.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/IR/Value.h" namespace llvm { class Constant; @@ -29,8 +29,8 @@ class Module; template class SmallVectorImpl; template class SymbolTableListTraits; - - + + //===----------------------------------------------------------------------===// /// MDString - a single uniqued string. /// These are used to efficiently contain a byte sequence for metadata. @@ -51,7 +51,7 @@ public: unsigned getLength() const { return (unsigned)getName().size(); } typedef StringRef::iterator iterator; - + /// begin() - Pointer to the first byte of the string. iterator begin() const { return getName().begin(); } @@ -64,9 +64,9 @@ public: } }; - + class MDNodeOperand; - + //===----------------------------------------------------------------------===// /// MDNode - a tuple of other values. class MDNode : public Value, public FoldingSetNode { @@ -82,37 +82,37 @@ class MDNode : public Value, public FoldingSetNode { /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the /// end of this MDNode. unsigned NumOperands; - + // Subclass data enums. enum { /// FunctionLocalBit - This bit is set if this MDNode is function local. /// This is true when it (potentially transitively) contains a reference to /// something in a function, like an argument, basicblock, or instruction. FunctionLocalBit = 1 << 0, - + /// NotUniquedBit - This is set on MDNodes that are not uniqued because they /// have a null operand. NotUniquedBit = 1 << 1, - + /// DestroyFlag - This bit is set by destroy() so the destructor can assert /// that the node isn't being destroyed with a plain 'delete'. DestroyFlag = 1 << 2 }; - + // FunctionLocal enums. enum FunctionLocalness { FL_Unknown = -1, FL_No = 0, FL_Yes = 1 }; - - /// replaceOperand - Replace each instance of F from the operand list of this + + /// replaceOperand - Replace each instance of F from the operand list of this /// node with T. void replaceOperand(MDNodeOperand *Op, Value *NewVal); ~MDNode(); MDNode(LLVMContext &C, ArrayRef Vals, bool isFunctionLocal); - + static MDNode *getMDNode(LLVMContext &C, ArrayRef Vals, FunctionLocalness FL, bool Insert = true); public: @@ -123,7 +123,7 @@ public: static MDNode *getWhenValsUnresolved(LLVMContext &Context, ArrayRef Vals, bool isFunctionLocal); - + static MDNode *getIfExists(LLVMContext &Context, ArrayRef Vals); /// getTemporary - Return a temporary MDNode, for use in constructing @@ -137,22 +137,22 @@ public: /// replaceOperandWith - Replace a specific operand. void replaceOperandWith(unsigned i, Value *NewVal); - + /// getOperand - Return specified operand. Value *getOperand(unsigned i) const; - + /// getNumOperands - Return number of MDNode operands. unsigned getNumOperands() const { return NumOperands; } - + /// isFunctionLocal - Return whether MDNode is local to a function. bool isFunctionLocal() const { return (getSubclassDataFromValue() & FunctionLocalBit) != 0; } - + // getFunction - If this metadata is function-local and recursively has a // function-local operand, return the first such operand's parent function. // Otherwise, return null. getFunction() should not be used for performance- - // critical code because it recursively visits all the MDNode's operands. + // critical code because it recursively visits all the MDNode's operands. const Function *getFunction() const; /// Profile - calculate a unique identifier for this MDNode to collapse @@ -172,11 +172,11 @@ private: // destroy - Delete this node. Only when there are no uses. void destroy(); - bool isNotUniqued() const { + bool isNotUniqued() const { return (getSubclassDataFromValue() & NotUniquedBit) != 0; } void setIsNotUniqued(); - + // Shadow Value::setValueSubclassData with a private forwarding method so that // any future subclasses cannot accidentally use it. void setValueSubclassData(unsigned short D) { @@ -220,7 +220,7 @@ public: /// getOperand - Return specified operand. MDNode *getOperand(unsigned i) const; - + /// getNumOperands - Return the number of NamedMDNode operands. unsigned getNumOperands() const; diff --git a/include/llvm/Module.h b/include/llvm/IR/Module.h similarity index 89% rename from include/llvm/Module.h rename to include/llvm/IR/Module.h index e6303ac7752d..4460aa435b94 100644 --- a/include/llvm/Module.h +++ b/include/llvm/IR/Module.h @@ -12,16 +12,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MODULE_H -#define LLVM_MODULE_H +#ifndef LLVM_IR_MODULE_H +#define LLVM_IR_MODULE_H -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Metadata.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/DataTypes.h" -#include namespace llvm { @@ -122,9 +121,6 @@ public: /// The type for the list of named metadata. typedef ilist NamedMDListType; - /// The type for the list of dependent libraries. - typedef std::vector LibraryListType; - /// The Global Variable iterator. typedef GlobalListType::iterator global_iterator; /// The Global Variable constant iterator. @@ -144,8 +140,6 @@ public: typedef NamedMDListType::iterator named_metadata_iterator; /// The named metadata constant interators. typedef NamedMDListType::const_iterator const_named_metadata_iterator; - /// The Library list iterator. - typedef LibraryListType::const_iterator lib_iterator; /// An enumeration for describing the endianess of the target machine. enum Endianness { AnyEndianness, LittleEndian, BigEndian }; @@ -153,30 +147,38 @@ public: /// An enumeration for describing the size of a pointer on the target machine. enum PointerSize { AnyPointerSize, Pointer32, Pointer64 }; - /// An enumeration for the supported behaviors of module flags. The following - /// module flags behavior values are supported: - /// - /// Value Behavior - /// ----- -------- - /// 1 Error - /// Emits an error if two values disagree. - /// - /// 2 Warning - /// Emits a warning if two values disagree. - /// - /// 3 Require - /// Emits an error when the specified value is not present - /// or doesn't have the specified value. It is an error for - /// two (or more) llvm.module.flags with the same ID to have - /// the Require behavior but different values. There may be - /// multiple Require flags per ID. - /// - /// 4 Override - /// Uses the specified value if the two values disagree. It - /// is an error for two (or more) llvm.module.flags with the - /// same ID to have the Override behavior but different - /// values. - enum ModFlagBehavior { Error = 1, Warning = 2, Require = 3, Override = 4 }; + /// This enumeration defines the supported behaviors of module flags. + enum ModFlagBehavior { + /// Emits an error if two values disagree, otherwise the resulting value is + /// that of the operands. + Error = 1, + + /// Emits a warning if two values disagree. The result value will be the + /// operand for the flag from the first module being linked. + Warning = 2, + + /// Adds a requirement that another module flag be present and have a + /// specified value after linking is performed. The value must be a metadata + /// pair, where the first element of the pair is the ID of the module flag + /// to be restricted, and the second element of the pair is the value the + /// module flag should be restricted to. This behavior can be used to + /// restrict the allowable results (via triggering of an error) of linking + /// IDs with the **Override** behavior. + Require = 3, + + /// Uses the specified value, regardless of the behavior or value of the + /// other module. If both modules specify **Override**, but the values + /// differ, an error will be emitted. + Override = 4, + + /// Appends the two values, which are required to be metadata nodes. + Append = 5, + + /// Appends the two values, which are required to be metadata + /// nodes. However, duplicate entries in the second list are dropped + /// during the append operation. + AppendUnique = 6 + }; struct ModuleFlagEntry { ModFlagBehavior Behavior; @@ -195,7 +197,6 @@ private: GlobalListType GlobalList; ///< The Global Variables in the module FunctionListType FunctionList; ///< The Functions in the module AliasListType AliasList; ///< The Aliases in the module - LibraryListType LibraryList; ///< The Libraries needed by the module NamedMDListType NamedMDList; ///< The named metadata in the module std::string GlobalScopeAsm; ///< Inline Asm at global scope. ValueSymbolTable *ValSymTab; ///< Symbol table for values @@ -319,7 +320,7 @@ public: /// 4. Finally, the function exists but has the wrong prototype: return the /// function with a constantexpr cast to the right prototype. Constant *getOrInsertFunction(StringRef Name, FunctionType *T, - AttrListPtr AttributeList); + AttributeSet AttributeList); Constant *getOrInsertFunction(StringRef Name, FunctionType *T); @@ -331,7 +332,7 @@ public: /// null terminated list of function arguments, which makes it easier for /// clients to use. Constant *getOrInsertFunction(StringRef Name, - AttrListPtr AttributeList, + AttributeSet AttributeList, Type *RetTy, ...) END_WITH_NULL; /// getOrInsertFunction - Same as above, but without the attributes. @@ -340,7 +341,7 @@ public: Constant *getOrInsertTargetIntrinsic(StringRef Name, FunctionType *Ty, - AttrListPtr AttributeList); + AttributeSet AttributeList); /// getFunction - Look up the specified function in the module symbol table. /// If it does not exist, return null. @@ -526,23 +527,6 @@ public: size_t size() const { return FunctionList.size(); } bool empty() const { return FunctionList.empty(); } -/// @} -/// @name Dependent Library Iteration -/// @{ - - /// @brief Get a constant iterator to beginning of dependent library list. - inline lib_iterator lib_begin() const { return LibraryList.begin(); } - /// @brief Get a constant iterator to end of dependent library list. - inline lib_iterator lib_end() const { return LibraryList.end(); } - /// @brief Returns the number of items in the list of libraries. - inline size_t lib_size() const { return LibraryList.size(); } - /// @brief Add a library to the list of dependent libraries - void addLibrary(StringRef Lib); - /// @brief Remove a library from the list of dependent libraries - void removeLibrary(StringRef Lib); - /// @brief Get all the libraries - inline const LibraryListType& getLibraries() const { return LibraryList; } - /// @} /// @name Alias Iteration /// @{ diff --git a/include/llvm/OperandTraits.h b/include/llvm/IR/OperandTraits.h similarity index 98% rename from include/llvm/OperandTraits.h rename to include/llvm/IR/OperandTraits.h index 3d8dc329b39f..0e4b1950f277 100644 --- a/include/llvm/OperandTraits.h +++ b/include/llvm/IR/OperandTraits.h @@ -12,10 +12,10 @@ // the operands in the most efficient manner. // -#ifndef LLVM_OPERAND_TRAITS_H -#define LLVM_OPERAND_TRAITS_H +#ifndef LLVM_IR_OPERANDTRAITS_H +#define LLVM_IR_OPERANDTRAITS_H -#include "llvm/User.h" +#include "llvm/IR/User.h" namespace llvm { diff --git a/include/llvm/Operator.h b/include/llvm/IR/Operator.h similarity index 62% rename from include/llvm/Operator.h rename to include/llvm/IR/Operator.h index b326c1135206..13ab72cfefc8 100644 --- a/include/llvm/Operator.h +++ b/include/llvm/IR/Operator.h @@ -12,13 +12,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_OPERATOR_H -#define LLVM_OPERATOR_H +#ifndef LLVM_IR_OPERATOR_H +#define LLVM_IR_OPERATOR_H -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instruction.h" -#include "llvm/Type.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" namespace llvm { @@ -31,8 +33,8 @@ class ConstantExpr; /// class Operator : public User { private: - // Do not implement any of these. The Operator class is intended to be used - // as a utility, and is never itself instantiated. + // The Operator class is intended to be used as a utility, and is never itself + // instantiated. void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION; void *operator new(size_t s) LLVM_DELETED_FUNCTION; Operator() LLVM_DELETED_FUNCTION; @@ -131,21 +133,21 @@ public: enum { IsExact = (1 << 0) }; - + private: friend class BinaryOperator; friend class ConstantExpr; void setIsExact(bool B) { SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact); } - + public: /// isExact - Test whether this division is known to be exact, with /// zero remainder. bool isExact() const { return SubclassOptionalData & IsExact; } - + static bool isPossiblyExactOpcode(unsigned OpC) { return OpC == Instruction::SDiv || OpC == Instruction::UDiv || @@ -164,10 +166,133 @@ public: } }; +/// Convenience struct for specifying and reasoning about fast-math flags. +class FastMathFlags { +private: + friend class FPMathOperator; + unsigned Flags; + FastMathFlags(unsigned F) : Flags(F) { } + +public: + enum { + UnsafeAlgebra = (1 << 0), + NoNaNs = (1 << 1), + NoInfs = (1 << 2), + NoSignedZeros = (1 << 3), + AllowReciprocal = (1 << 4) + }; + + FastMathFlags() : Flags(0) + { } + + /// Whether any flag is set + bool any() { return Flags != 0; } + + /// Set all the flags to false + void clear() { Flags = 0; } + + /// Flag queries + bool noNaNs() { return 0 != (Flags & NoNaNs); } + bool noInfs() { return 0 != (Flags & NoInfs); } + bool noSignedZeros() { return 0 != (Flags & NoSignedZeros); } + bool allowReciprocal() { return 0 != (Flags & AllowReciprocal); } + bool unsafeAlgebra() { return 0 != (Flags & UnsafeAlgebra); } + + /// Flag setters + void setNoNaNs() { Flags |= NoNaNs; } + void setNoInfs() { Flags |= NoInfs; } + void setNoSignedZeros() { Flags |= NoSignedZeros; } + void setAllowReciprocal() { Flags |= AllowReciprocal; } + void setUnsafeAlgebra() { + Flags |= UnsafeAlgebra; + setNoNaNs(); + setNoInfs(); + setNoSignedZeros(); + setAllowReciprocal(); + } +}; + + /// FPMathOperator - Utility class for floating point operations which can have /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { +private: + friend class Instruction; + + void setHasUnsafeAlgebra(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::UnsafeAlgebra) | + (B * FastMathFlags::UnsafeAlgebra); + + // Unsafe algebra implies all the others + if (B) { + setHasNoNaNs(true); + setHasNoInfs(true); + setHasNoSignedZeros(true); + setHasAllowReciprocal(true); + } + } + void setHasNoNaNs(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::NoNaNs) | + (B * FastMathFlags::NoNaNs); + } + void setHasNoInfs(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::NoInfs) | + (B * FastMathFlags::NoInfs); + } + void setHasNoSignedZeros(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) | + (B * FastMathFlags::NoSignedZeros); + } + void setHasAllowReciprocal(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) | + (B * FastMathFlags::AllowReciprocal); + } + + /// Convenience function for setting all the fast-math flags + void setFastMathFlags(FastMathFlags FMF) { + SubclassOptionalData |= FMF.Flags; + } + public: + /// Test whether this operation is permitted to be + /// algebraically transformed, aka the 'A' fast-math property. + bool hasUnsafeAlgebra() const { + return (SubclassOptionalData & FastMathFlags::UnsafeAlgebra) != 0; + } + + /// Test whether this operation's arguments and results are to be + /// treated as non-NaN, aka the 'N' fast-math property. + bool hasNoNaNs() const { + return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; + } + + /// Test whether this operation's arguments and results are to be + /// treated as NoN-Inf, aka the 'I' fast-math property. + bool hasNoInfs() const { + return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; + } + + /// Test whether this operation can treat the sign of zero + /// as insignificant, aka the 'S' fast-math property. + bool hasNoSignedZeros() const { + return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; + } + + /// Test whether this operation is permitted to use + /// reciprocal instead of division, aka the 'R' fast-math property. + bool hasAllowReciprocal() const { + return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; + } + + /// Convenience function for getting all the fast-math flags + FastMathFlags getFastMathFlags() const { + return FastMathFlags(SubclassOptionalData); + } /// \brief Get the maximum error permitted by this operation in ULPs. An /// accuracy of 0.0 means that the operation should be performed with the @@ -182,7 +307,7 @@ public: } }; - + /// ConcreteOperator - A helper template for defining operators for individual /// opcodes. template @@ -307,6 +432,45 @@ public: } return true; } + + /// \brief Accumulate the constant address offset of this GEP if possible. + /// + /// This routine accepts an APInt into which it will accumulate the constant + /// offset of this GEP if the GEP is in fact constant. If the GEP is not + /// all-constant, it returns false and the value of the offset APInt is + /// undefined (it is *not* preserved!). The APInt passed into this routine + /// must be at least as wide as the IntPtr type for the address space of + /// the base GEP pointer. + bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const { + assert(Offset.getBitWidth() == + DL.getPointerSizeInBits(getPointerAddressSpace()) && + "The offset must have exactly as many bits as our pointer."); + + for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) + return false; + if (OpC->isZero()) + continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + Offset += APInt(Offset.getBitWidth(), + SL->getElementOffset(ElementIdx)); + continue; + } + + // For array or vector indices, scale the index by the size of the type. + APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); + Offset += Index * APInt(Offset.getBitWidth(), + DL.getTypeAllocSize(GTI.getIndexedType())); + } + return true; + } + }; } // End llvm namespace diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h similarity index 97% rename from include/llvm/SymbolTableListTraits.h rename to include/llvm/IR/SymbolTableListTraits.h index ec5c88f5c8a7..561ce010c0e0 100644 --- a/include/llvm/SymbolTableListTraits.h +++ b/include/llvm/IR/SymbolTableListTraits.h @@ -22,8 +22,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SYMBOLTABLELISTTRAITS_H -#define LLVM_SYMBOLTABLELISTTRAITS_H +#ifndef LLVM_IR_SYMBOLTABLELISTTRAITS_H +#define LLVM_IR_SYMBOLTABLELISTTRAITS_H #include "llvm/ADT/ilist.h" diff --git a/include/llvm/Type.h b/include/llvm/IR/Type.h similarity index 96% rename from include/llvm/Type.h rename to include/llvm/IR/Type.h index def45750dd71..d89ae243f5e7 100644 --- a/include/llvm/Type.h +++ b/include/llvm/IR/Type.h @@ -12,11 +12,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TYPE_H -#define LLVM_TYPE_H +#ifndef LLVM_IR_TYPE_H +#define LLVM_IR_TYPE_H +#include "llvm/ADT/APFloat.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -162,6 +164,18 @@ public: getTypeID() == PPC_FP128TyID; } + const fltSemantics &getFltSemantics() const { + switch (getTypeID()) { + case HalfTyID: return APFloat::IEEEhalf; + case FloatTyID: return APFloat::IEEEsingle; + case DoubleTyID: return APFloat::IEEEdouble; + case X86_FP80TyID: return APFloat::x87DoubleExtended; + case FP128TyID: return APFloat::IEEEquad; + case PPC_FP128TyID: return APFloat::PPCDoubleDouble; + default: llvm_unreachable("Invalid floating type"); + } + } + /// isX86_MMXTy - Return true if this is X86 MMX. bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } diff --git a/include/llvm/TypeBuilder.h b/include/llvm/IR/TypeBuilder.h similarity index 99% rename from include/llvm/TypeBuilder.h rename to include/llvm/IR/TypeBuilder.h index 0b5647973184..80c60a080614 100644 --- a/include/llvm/TypeBuilder.h +++ b/include/llvm/IR/TypeBuilder.h @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TYPEBUILDER_H -#define LLVM_TYPEBUILDER_H +#ifndef LLVM_IR_TYPEBUILDER_H +#define LLVM_IR_TYPEBUILDER_H -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" #include namespace llvm { diff --git a/include/llvm/TypeFinder.h b/include/llvm/IR/TypeFinder.h similarity index 95% rename from include/llvm/TypeFinder.h rename to include/llvm/IR/TypeFinder.h index 5d807057a32d..cea66a4ab069 100644 --- a/include/llvm/TypeFinder.h +++ b/include/llvm/IR/TypeFinder.h @@ -1,4 +1,4 @@ -//===-- llvm/TypeFinder.h - Class for finding used struct types -*- C++ -*-===// +//===-- llvm/IR/TypeFinder.h - Class to find used struct types --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TYPEFINDER_H -#define LLVM_TYPEFINDER_H +#ifndef LLVM_IR_TYPEFINDER_H +#define LLVM_IR_TYPEFINDER_H #include "llvm/ADT/DenseSet.h" #include diff --git a/include/llvm/Use.h b/include/llvm/IR/Use.h similarity index 95% rename from include/llvm/Use.h rename to include/llvm/IR/Use.h index 80804459cc33..4bc7ce500058 100644 --- a/include/llvm/Use.h +++ b/include/llvm/IR/Use.h @@ -22,8 +22,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_USE_H -#define LLVM_USE_H +#ifndef LLVM_IR_USE_H +#define LLVM_IR_USE_H #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Compiler.h" @@ -66,7 +66,6 @@ public: typedef PointerIntPair UserRef; private: - /// Copy ctor - do not implement Use(const Use &U) LLVM_DELETED_FUNCTION; /// Destructor - Only for zap() @@ -150,14 +149,14 @@ private: // casting operators. template<> struct simplify_type { typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const Use &Val) { - return static_cast(Val.get()); + static SimpleType getSimplifiedValue(Use &Val) { + return Val.get(); } }; template<> struct simplify_type { - typedef Value* SimpleType; + typedef /*const*/ Value* SimpleType; static SimpleType getSimplifiedValue(const Use &Val) { - return static_cast(Val.get()); + return Val.get(); } }; diff --git a/include/llvm/User.h b/include/llvm/IR/User.h similarity index 92% rename from include/llvm/User.h rename to include/llvm/IR/User.h index df303d0dd5f2..505bdeb178e9 100644 --- a/include/llvm/User.h +++ b/include/llvm/IR/User.h @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_USER_H -#define LLVM_USER_H +#ifndef LLVM_IR_USER_H +#define LLVM_IR_USER_H +#include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Value.h" namespace llvm { @@ -183,27 +183,17 @@ public: template<> struct simplify_type { typedef Value* SimpleType; - - static SimpleType getSimplifiedValue(const User::op_iterator &Val) { - return static_cast(Val->get()); + static SimpleType getSimplifiedValue(User::op_iterator &Val) { + return Val->get(); } }; - -template<> struct simplify_type - : public simplify_type {}; - template<> struct simplify_type { - typedef Value* SimpleType; - - static SimpleType getSimplifiedValue(const User::const_op_iterator &Val) { - return static_cast(Val->get()); + typedef /*const*/ Value* SimpleType; + static SimpleType getSimplifiedValue(User::const_op_iterator &Val) { + return Val->get(); } }; -template<> struct simplify_type - : public simplify_type {}; - - // value_use_iterator::getOperandNo - Requires the definition of the User class. template unsigned value_use_iterator::getOperandNo() const { diff --git a/include/llvm/Value.h b/include/llvm/IR/Value.h similarity index 99% rename from include/llvm/Value.h rename to include/llvm/IR/Value.h index 5b19435ebaf4..a4f78627a84d 100644 --- a/include/llvm/Value.h +++ b/include/llvm/IR/Value.h @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_VALUE_H -#define LLVM_VALUE_H +#ifndef LLVM_IR_VALUE_H +#define LLVM_IR_VALUE_H -#include "llvm/Use.h" +#include "llvm/IR/Use.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h similarity index 97% rename from include/llvm/ValueSymbolTable.h rename to include/llvm/IR/ValueSymbolTable.h index 1738cc4a7a79..bf1fade1ccef 100644 --- a/include/llvm/ValueSymbolTable.h +++ b/include/llvm/IR/ValueSymbolTable.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_VALUE_SYMBOL_TABLE_H -#define LLVM_VALUE_SYMBOL_TABLE_H +#ifndef LLVM_IR_VALUESYMBOLTABLE_H +#define LLVM_IR_VALUESYMBOLTABLE_H -#include "llvm/Value.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/Value.h" #include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h new file mode 100644 index 000000000000..e2ae5f7164b2 --- /dev/null +++ b/include/llvm/IRReader/IRReader.h @@ -0,0 +1,55 @@ +//===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions for reading LLVM IR. They support both +// Bitcode and Assembly, automatically detecting the input format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IRREADER_IRREADER_H +#define LLVM_IRREADER_IRREADER_H + +#include + +namespace llvm { + +class Module; +class MemoryBuffer; +class SMDiagnostic; +class LLVMContext; + +/// If the given MemoryBuffer holds a bitcode image, return a Module for it +/// which does lazy deserialization of function bodies. Otherwise, attempt to +/// parse it as LLVM Assembly and return a fully populated Module. This +/// function *always* takes ownership of the given MemoryBuffer. +Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, + LLVMContext &Context); + +/// If the given file holds a bitcode image, return a Module +/// for it which does lazy deserialization of function bodies. Otherwise, +/// attempt to parse it as LLVM Assembly and return a fully populated +/// Module. +Module *getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context); + +/// If the given MemoryBuffer holds a bitcode image, return a Module +/// for it. Otherwise, attempt to parse it as LLVM Assembly and return +/// a Module for it. This function *always* takes ownership of the given +/// MemoryBuffer. +Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context); + +/// If the given file holds a bitcode image, return a Module for it. +/// Otherwise, attempt to parse it as LLVM Assembly and return a Module +/// for it. +Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context); + +} + +#endif diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 8c164eb91984..9cc194b4248a 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -31,6 +31,10 @@ void initializeTransformUtils(PassRegistry&); /// ScalarOpts library. void initializeScalarOpts(PassRegistry&); +/// initializeObjCARCOpts - Initialize all passes linked into the ObjCARCOpts +/// library. +void initializeObjCARCOpts(PassRegistry&); + /// initializeVectorization - Initialize all passes linked into the /// Vectorize library. void initializeVectorization(PassRegistry&); @@ -69,6 +73,7 @@ void initializeArgPromotionPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeBasicCallGraphPass(PassRegistry&); +void initializeBasicTTIPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoPass(PassRegistry&); void initializeBlockPlacementPass(PassRegistry&); @@ -76,6 +81,8 @@ void initializeBoundsCheckingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); +void initializeCallGraphPrinterPass(PassRegistry&); +void initializeCallGraphViewerPass(PassRegistry&); void initializeCFGOnlyPrinterPass(PassRegistry&); void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); @@ -84,7 +91,6 @@ void initializeCFGViewerPass(PassRegistry&); void initializeCalculateSpillWeightsPass(PassRegistry&); void initializeCallGraphAnalysisGroup(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); -void initializeCodePlacementOptPass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeMachineCopyPropagationPass(PassRegistry&); @@ -110,12 +116,13 @@ void initializeExpandPostRAPass(PassRegistry&); void initializePathProfilerPass(PassRegistry&); void initializeGCOVProfilerPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); +void initializeAddressSanitizerModulePass(PassRegistry&); +void initializeMemorySanitizerPass(PassRegistry&); void initializeThreadSanitizerPass(PassRegistry&); void initializeEarlyCSEPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); -void initializeGCInfoDeleterPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); @@ -127,6 +134,7 @@ void initializeIPSCCPPass(PassRegistry&); void initializeIVUsersPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); void initializeIndVarSimplifyPass(PassRegistry&); +void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstCombinerPass(PassRegistry&); void initializeInstCountPass(PassRegistry&); void initializeInstNamerPass(PassRegistry&); @@ -172,7 +180,6 @@ void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachinePostDominatorTreePass(PassRegistry&); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); -void initializeMachineLoopRangesPass(PassRegistry&); void initializeMachineModuleInfoPass(PassRegistry&); void initializeMachineSchedulerPass(PassRegistry&); void initializeMachineSinkingPass(PassRegistry&); @@ -205,9 +212,9 @@ void initializePostDomViewerPass(PassRegistry&); void initializePostDominatorTreePass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); void initializePreVerifierPass(PassRegistry&); -void initializePrintDbgInfoPass(PassRegistry&); void initializePrintFunctionPassPass(PassRegistry&); void initializePrintModulePassPass(PassRegistry&); +void initializePrintBasicBlockPassPass(PassRegistry&); void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileEstimatorPassPass(PassRegistry&); void initializeProfileInfoAnalysisGroup(PassRegistry&); @@ -249,7 +256,8 @@ void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); void initializeDataLayoutPass(PassRegistry&); -void initializeTargetTransformInfoPass(PassRegistry&); +void initializeTargetTransformInfoAnalysisGroup(PassRegistry&); +void initializeNoTTIPass(PassRegistry&); void initializeTargetLibraryInfoPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAliasAnalysisPass(PassRegistry&); diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/InstVisitor.h similarity index 97% rename from include/llvm/Support/InstVisitor.h rename to include/llvm/InstVisitor.h index 6dfb4dec0e23..291170334c0a 100644 --- a/include/llvm/Support/InstVisitor.h +++ b/include/llvm/InstVisitor.h @@ -1,4 +1,4 @@ -//===- llvm/Support/InstVisitor.h - Define instruction visitors -*- C++ -*-===// +//===- llvm/InstVisitor.h - Instruction visitor templates -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_INSTVISITOR_H -#define LLVM_SUPPORT_INSTVISITOR_H +#ifndef LLVM_INSTVISITOR_H +#define LLVM_INSTVISITOR_H -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" @@ -25,7 +25,7 @@ namespace llvm { // types now... // #define HANDLE_INST(NUM, OPCODE, CLASS) class CLASS; -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" #define DELEGATE(CLASS_TO_VISIT) \ return static_cast(this)-> \ @@ -123,7 +123,7 @@ public: case Instruction::OPCODE: return \ static_cast(this)-> \ visit##OPCODE(static_cast(I)); -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" } } @@ -158,7 +158,7 @@ public: else \ DELEGATE(CLASS); \ } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" // Specific Instruction type classes... note that all of the casts are // necessary because we use the instruction classes as opaque types... diff --git a/include/llvm/IntrinsicsCellSPU.td b/include/llvm/IntrinsicsCellSPU.td deleted file mode 100644 index 1e311bbecbc6..000000000000 --- a/include/llvm/IntrinsicsCellSPU.td +++ /dev/null @@ -1,242 +0,0 @@ -//==- IntrinsicsCellSPU.td - Cell SDK intrinsics -*- tablegen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// Department at The Aerospace Corporation and is distributed under the -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instructions: -//===----------------------------------------------------------------------===// -// TODO Items (not urgent today, but would be nice, low priority) -// -// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by -// concatenating the byte argument b as "bbbb". Could recognize this bit pattern -// in 16-bit and 32-bit constants and reduce instruction count. -//===----------------------------------------------------------------------===// - -// 7-bit integer type, used as an immediate: -def cell_i7_ty: LLVMType; -def cell_i8_ty: LLVMType; - -// Keep this here until it's actually supported: -def llvm_i128_ty : LLVMType; - -class v16i8_u7imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, cell_i7_ty], - [IntrNoMem]>; - -class v16i8_u8imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - -class v16i8_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v16i8_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v16i8_rr : - GCCBuiltin, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem]>; - -class v8i16_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v8i16_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v8i16_rr : - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - -class v4i32_rr : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; - -class v4i32_u7imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, cell_i7_ty], - [IntrNoMem]>; - -class v4i32_s10imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v4i32_u16imm : - GCCBuiltin, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty], - [IntrNoMem]>; - -class v4f32_rr : - GCCBuiltin, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - -class v4f32_rrr : - GCCBuiltin, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - -class v2f64_rr : - GCCBuiltin, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; - -// All Cell SPU intrinsics start with "llvm.spu.". -let TargetPrefix = "spu" in { - def int_spu_si_fsmbi : v8i16_u16imm<"fsmbi">; - def int_spu_si_ah : v8i16_rr<"ah">; - def int_spu_si_ahi : v8i16_s10imm<"ahi">; - def int_spu_si_a : v4i32_rr<"a">; - def int_spu_si_ai : v4i32_s10imm<"ai">; - def int_spu_si_sfh : v8i16_rr<"sfh">; - def int_spu_si_sfhi : v8i16_s10imm<"sfhi">; - def int_spu_si_sf : v4i32_rr<"sf">; - def int_spu_si_sfi : v4i32_s10imm<"sfi">; - def int_spu_si_addx : v4i32_rr<"addx">; - def int_spu_si_cg : v4i32_rr<"cg">; - def int_spu_si_cgx : v4i32_rr<"cgx">; - def int_spu_si_sfx : v4i32_rr<"sfx">; - def int_spu_si_bg : v4i32_rr<"bg">; - def int_spu_si_bgx : v4i32_rr<"bgx">; - def int_spu_si_mpy : // This is special: - GCCBuiltin<"__builtin_si_mpy">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyu : // This is special: - GCCBuiltin<"__builtin_si_mpyu">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyi : // This is special: - GCCBuiltin<"__builtin_si_mpyi">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyui : // This is special: - GCCBuiltin<"__builtin_si_mpyui">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_spu_si_mpya : // This is special: - GCCBuiltin<"__builtin_si_mpya">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyh : // This is special: - GCCBuiltin<"__builtin_si_mpyh">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpys : // This is special: - GCCBuiltin<"__builtin_si_mpys">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhh : // This is special: - GCCBuiltin<"__builtin_si_mpyhh">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhha : // This is special: - GCCBuiltin<"__builtin_si_mpyhha">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhhu : // This is special: - GCCBuiltin<"__builtin_si_mpyhhu">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - def int_spu_si_mpyhhau : // This is special: - GCCBuiltin<"__builtin_si_mpyhhau">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; - - def int_spu_si_shli: v4i32_u7imm<"shli">; - - def int_spu_si_shlqbi: - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem]>; - - def int_spu_si_shlqbii: v16i8_u7imm<"shlqbii">; - def int_spu_si_shlqby: - GCCBuiltin, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_spu_si_shlqbyi: v16i8_u7imm<"shlqbyi">; - - def int_spu_si_ceq: v4i32_rr<"ceq">; - def int_spu_si_ceqi: v4i32_s10imm<"ceqi">; - def int_spu_si_ceqb: v16i8_rr<"ceqb">; - def int_spu_si_ceqbi: v16i8_u8imm<"ceqbi">; - def int_spu_si_ceqh: v8i16_rr<"ceqh">; - def int_spu_si_ceqhi: v8i16_s10imm<"ceqhi">; - def int_spu_si_cgt: v4i32_rr<"cgt">; - def int_spu_si_cgti: v4i32_s10imm<"cgti">; - def int_spu_si_cgtb: v16i8_rr<"cgtb">; - def int_spu_si_cgtbi: v16i8_u8imm<"cgtbi">; - def int_spu_si_cgth: v8i16_rr<"cgth">; - def int_spu_si_cgthi: v8i16_s10imm<"cgthi">; - def int_spu_si_clgtb: v16i8_rr<"clgtb">; - def int_spu_si_clgtbi: v16i8_u8imm<"clgtbi">; - def int_spu_si_clgth: v8i16_rr<"clgth">; - def int_spu_si_clgthi: v8i16_s10imm<"clgthi">; - def int_spu_si_clgt: v4i32_rr<"clgt">; - def int_spu_si_clgti: v4i32_s10imm<"clgti">; - - def int_spu_si_and: v4i32_rr<"and">; - def int_spu_si_andbi: v16i8_u8imm<"andbi">; - def int_spu_si_andc: v4i32_rr<"andc">; - def int_spu_si_andhi: v8i16_s10imm<"andhi">; - def int_spu_si_andi: v4i32_s10imm<"andi">; - - def int_spu_si_or: v4i32_rr<"or">; - def int_spu_si_orbi: v16i8_u8imm<"orbi">; - def int_spu_si_orc: v4i32_rr<"orc">; - def int_spu_si_orhi: v8i16_s10imm<"orhi">; - def int_spu_si_ori: v4i32_s10imm<"ori">; - - def int_spu_si_xor: v4i32_rr<"xor">; - def int_spu_si_xorbi: v16i8_u8imm<"xorbi">; - def int_spu_si_xorhi: v8i16_s10imm<"xorhi">; - def int_spu_si_xori: v4i32_s10imm<"xori">; - - def int_spu_si_nor: v4i32_rr<"nor">; - def int_spu_si_nand: v4i32_rr<"nand">; - - def int_spu_si_fa: v4f32_rr<"fa">; - def int_spu_si_fs: v4f32_rr<"fs">; - def int_spu_si_fm: v4f32_rr<"fm">; - - def int_spu_si_fceq: v4f32_rr<"fceq">; - def int_spu_si_fcmeq: v4f32_rr<"fcmeq">; - def int_spu_si_fcgt: v4f32_rr<"fcgt">; - def int_spu_si_fcmgt: v4f32_rr<"fcmgt">; - - def int_spu_si_fma: v4f32_rrr<"fma">; - def int_spu_si_fnms: v4f32_rrr<"fnms">; - def int_spu_si_fms: v4f32_rrr<"fms">; - - def int_spu_si_dfa: v2f64_rr<"dfa">; - def int_spu_si_dfs: v2f64_rr<"dfs">; - def int_spu_si_dfm: v2f64_rr<"dfm">; - -//def int_spu_si_dfceq: v2f64_rr<"dfceq">; -//def int_spu_si_dfcmeq: v2f64_rr<"dfcmeq">; -//def int_spu_si_dfcgt: v2f64_rr<"dfcgt">; -//def int_spu_si_dfcmgt: v2f64_rr<"dfcmgt">; - - def int_spu_si_dfnma: v2f64_rr<"dfnma">; - def int_spu_si_dfma: v2f64_rr<"dfma">; - def int_spu_si_dfnms: v2f64_rr<"dfnms">; - def int_spu_si_dfms: v2f64_rr<"dfms">; -} diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllIR.h similarity index 85% rename from include/llvm/LinkAllVMCore.h rename to include/llvm/LinkAllIR.h index 83684c0fb65d..4c1aaca7a385 100644 --- a/include/llvm/LinkAllVMCore.h +++ b/include/llvm/LinkAllIR.h @@ -1,4 +1,4 @@ -//===- LinkAllVMCore.h - Reference All VMCore Code --------------*- C++ -*-===// +//===----- LinkAllIR.h - Reference All VMCore Code --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,16 +13,18 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LINKALLVMCORE_H -#define LLVM_LINKALLVMCORE_H +#ifndef LLVM_LINKALLIR_H +#define LLVM_LINKALLIR_H -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/InlineAsm.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Path.h" @@ -30,8 +32,6 @@ #include "llvm/Support/Program.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TimeValue.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/MathExtras.h" #include namespace { diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 806e4b37b73d..1f017e471de5 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -16,23 +16,25 @@ #define LLVM_LINKALLPASSES_H #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/IntervalPartition.h" +#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionPrinter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/Lint.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" -#include "llvm/Transforms/Instrumentation.h" +#include "llvm/IR/Function.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Vectorize.h" #include namespace { @@ -57,6 +59,8 @@ namespace { (void) llvm::createBlockPlacementPass(); (void) llvm::createBoundsCheckingPass(); (void) llvm::createBreakCriticalEdgesPass(); + (void) llvm::createCallGraphPrinterPass(); + (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); @@ -147,7 +151,7 @@ namespace { (void) llvm::createMergeFunctionsPass(); (void) llvm::createPrintModulePass(0); (void) llvm::createPrintFunctionPass("", 0); - (void) llvm::createDbgInfoPrinterPass(); + (void) llvm::createPrintBasicBlockPass(0); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); (void) llvm::createLintPass(); diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h index 1ebcd6b53863..679638427d67 100644 --- a/include/llvm/Linker.h +++ b/include/llvm/Linker.h @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file defines the interface to the module/file/archive linker. -// -//===----------------------------------------------------------------------===// #ifndef LLVM_LINKER_H #define LLVM_LINKER_H @@ -19,7 +15,6 @@ #include namespace llvm { - namespace sys { class Path; } class Module; class LLVMContext; @@ -31,26 +26,17 @@ class StringRef; /// In this case the Linker still retains ownership of the Module. If the /// releaseModule() method is used, the ownership of the Module is transferred /// to the caller and the Linker object is only suitable for destruction. -/// The Linker can link Modules from memory, bitcode files, or bitcode -/// archives. It retains a set of search paths in which to find any libraries -/// presented to it. By default, the linker will generate error and warning -/// messages to stderr but this capability can be turned off with the -/// QuietWarnings and QuietErrors flags. It can also be instructed to verbosely -/// print out the linking actions it is taking with the Verbose flag. +/// The Linker can link Modules from memory. By default, the linker +/// will generate error and warning messages to stderr but this capability can +/// be turned off with the QuietWarnings and QuietErrors flags. It can also be +/// instructed to verbosely print out the linking actions it is taking with +/// the Verbose flag. /// @brief The LLVM Linker. class Linker { /// @name Types /// @{ public: - /// This type is used to pass the linkage items (libraries and files) to - /// the LinkItems function. It is composed of string/bool pairs. The string - /// provides the name of the file or library (as with the -l option). The - /// bool should be true for libraries and false for files, signifying - /// "isLibrary". - /// @brief A list of linkage items - typedef std::vector > ItemList; - /// This enumeration is used to control various optional features of the /// linker. enum ControlFlags { @@ -58,12 +44,12 @@ class Linker { QuietWarnings = 2, ///< Don't print warnings to stderr. QuietErrors = 4 ///< Don't print errors to stderr. }; - + enum LinkerMode { DestroySource = 0, // Allow source module to be destroyed. PreserveSource = 1 // Preserve the source module. }; - + /// @} /// @name Constructors /// @{ @@ -104,16 +90,10 @@ class Linker { /// must arrange for its destruct. After this method is called, the Linker /// terminates the linking session for the returned Module. It will no /// longer utilize the returned Module but instead resets itself for - /// subsequent linking as if the constructor had been called. The Linker's - /// LibPaths and flags to be reset, and memory will be released. + /// subsequent linking as if the constructor had been called. /// @brief Release the linked/composite module. Module* releaseModule(); - /// This method gets the list of libraries that form the path that the - /// Linker will search when it is presented with a library name. - /// @brief Get the Linkers library path - const std::vector& getLibPaths() const { return LibPaths; } - /// This method returns an error string suitable for printing to the user. /// The return value will be empty unless an error occurred in one of the /// LinkIn* methods. In those cases, the LinkIn* methods will have returned @@ -128,130 +108,16 @@ class Linker { /// @name Mutators /// @{ public: - /// Add a path to the list of paths that the Linker will search. The Linker - /// accumulates the set of libraries added - /// library paths for the target platform. The standard libraries will - /// always be searched last. The added libraries will be searched in the - /// order added. - /// @brief Add a path. - void addPath(const sys::Path& path); - - /// Add a set of paths to the list of paths that the linker will search. The - /// Linker accumulates the set of libraries added. The \p paths will be - /// added to the end of the Linker's list. Order will be retained. - /// @brief Add a set of paths. - void addPaths(const std::vector& paths); - - /// This method augments the Linker's list of library paths with the system - /// paths of the host operating system, include LLVM_LIB_SEARCH_PATH. - /// @brief Add the system paths. - void addSystemPaths(); - - /// Control optional linker behavior by setting a group of flags. The flags - /// are defined in the ControlFlags enumeration. - /// @see ControlFlags - /// @brief Set control flags. - void setFlags(unsigned flags) { Flags = flags; } - - /// This method is the main interface to the linker. It can be used to - /// link a set of linkage items into a module. A linkage item is either a - /// file name with fully qualified path, or a library for which the Linker's - /// LibraryPath will be utilized to locate the library. The bool value in - /// the LinkItemKind should be set to true for libraries. This function - /// allows linking to preserve the order of specification associated with - /// the command line, or for other purposes. Each item will be linked in - /// turn as it occurs in \p Items. - /// @returns true if an error occurred, false otherwise - /// @see LinkItemKind - /// @see getLastError - bool LinkInItems ( - const ItemList& Items, ///< Set of libraries/files to link in - ItemList& NativeItems ///< Output list of native files/libs - ); - - /// This function links the bitcode \p Files into the composite module. - /// Note that this does not do any linking of unresolved symbols. The \p - /// Files are all completely linked into \p HeadModule regardless of - /// unresolved symbols. This function just loads each bitcode file and - /// calls LinkInModule on them. - /// @returns true if an error occurs, false otherwise - /// @see getLastError - /// @brief Link in multiple files. - bool LinkInFiles ( - const std::vector & Files ///< Files to link in - ); - - /// This function links a single bitcode file, \p File, into the composite - /// module. Note that this does not attempt to resolve symbols. This method - /// just loads the bitcode file and calls LinkInModule on it. If an error - /// occurs, the Linker's error string is set. - /// @returns true if an error occurs, false otherwise - /// @see getLastError - /// @brief Link in a single file. - bool LinkInFile( - const sys::Path& File, ///< File to link in. - bool &is_native ///< Indicates if the file is native object file - ); - - /// This function provides a way to selectively link in a set of modules, - /// found in libraries, based on the unresolved symbols in the composite - /// module. Each item in \p Libraries should be the base name of a library, - /// as if given with the -l option of a linker tool. The Linker's LibPaths - /// are searched for the \p Libraries and any found will be linked in with - /// LinkInArchive. If an error occurs, the Linker's error string is set. - /// @see LinkInArchive - /// @see getLastError - /// @returns true if an error occurs, false otherwise - /// @brief Link libraries into the module - bool LinkInLibraries ( - const std::vector & Libraries ///< Libraries to link in - ); - - /// This function provides a way to selectively link in a set of modules, - /// found in one library, based on the unresolved symbols in the composite - /// module.The \p Library should be the base name of a library, as if given - /// with the -l option of a linker tool. The Linker's LibPaths are searched - /// for the \p Library and if found, it will be linked in with via the - /// LinkInArchive method. If an error occurs, the Linker's error string is - /// set. - /// @see LinkInArchive - /// @see getLastError - /// @returns true if an error occurs, false otherwise - /// @brief Link one library into the module - bool LinkInLibrary ( - StringRef Library, ///< The library to link in - bool& is_native ///< Indicates if lib a native library - ); - - /// This function links one bitcode archive, \p Filename, into the module. - /// The archive is searched to resolve outstanding symbols. Any modules in - /// the archive that resolve outstanding symbols will be linked in. The - /// library is searched repeatedly until no more modules that resolve - /// symbols can be found. If an error occurs, the error string is set. - /// To speed up this function, ensure the archive has been processed - /// llvm-ranlib or the S option was given to llvm-ar when the archive was - /// created. These tools add a symbol table to the archive which makes the - /// search for undefined symbols much faster. - /// @see getLastError - /// @returns true if an error occurs, otherwise false. - /// @brief Link in one archive. - bool LinkInArchive( - const sys::Path& Filename, ///< Filename of the archive to link - bool& is_native ///< Indicates if archive is a native archive - ); - /// This method links the \p Src module into the Linker's Composite module - /// by calling LinkModules. All the other LinkIn* methods eventually - /// result in calling this method to link a Module into the Linker's - /// composite. + /// by calling LinkModules. /// @see LinkModules /// @returns True if an error occurs, false otherwise. /// @brief Link in a module. bool LinkInModule( Module* Src, ///< Module linked into \p Dest std::string* ErrorMsg = 0 /// Error/diagnostic string - ) { - return LinkModules(Composite, Src, Linker::DestroySource, ErrorMsg ); + ) { + return LinkModules(Composite, Src, Linker::DestroySource, ErrorMsg); } /// This is the heart of the linker. This method will take unconditional @@ -268,21 +134,10 @@ class Linker { static bool LinkModules(Module* Dest, Module* Src, unsigned Mode, std::string* ErrorMsg); - /// This function looks through the Linker's LibPaths to find a library with - /// the name \p Filename. If the library cannot be found, the returned path - /// will be empty (i.e. sys::Path::isEmpty() will return true). - /// @returns A sys::Path to the found library - /// @brief Find a library from its short name. - sys::Path FindLib(StringRef Filename); - /// @} /// @name Implementation /// @{ private: - /// Read in and parse the bitcode file named by FN and return the - /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs. - std::auto_ptr LoadObject(const sys::Path& FN); - bool warning(StringRef message); bool error(StringRef message); void verbose(StringRef message); @@ -293,7 +148,6 @@ class Linker { private: LLVMContext& Context; ///< The context for global information Module* Composite; ///< The composite module linked together - std::vector LibPaths; ///< The library search paths unsigned Flags; ///< Flags to control optional behavior. std::string Error; ///< Text of error that occurred. std::string ProgramName; ///< Name of the program being linked diff --git a/include/llvm/MC/EDInstInfo.h b/include/llvm/MC/EDInstInfo.h deleted file mode 100644 index 5b024675cdc8..000000000000 --- a/include/llvm/MC/EDInstInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- llvm/MC/EDInstInfo.h - EDis instruction info ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#ifndef EDINSTINFO_H -#define EDINSTINFO_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -#define EDIS_MAX_OPERANDS 13 -#define EDIS_MAX_SYNTAXES 2 - -struct EDInstInfo { - uint8_t instructionType; - uint8_t numOperands; - uint8_t operandTypes[EDIS_MAX_OPERANDS]; - uint8_t operandFlags[EDIS_MAX_OPERANDS]; - const signed char operandOrders[EDIS_MAX_SYNTAXES][EDIS_MAX_OPERANDS]; -}; - -} // namespace llvm - -#endif diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 72ed1a317c55..9a6b70340808 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -22,7 +22,7 @@ class MCELFObjectTargetWriter; struct MCFixupKindInfo; class MCFragment; class MCInst; -class MCInstFragment; +class MCRelaxableFragment; class MCObjectWriter; class MCSection; class MCValue; @@ -41,6 +41,9 @@ protected: // Can only create subclasses. public: virtual ~MCAsmBackend(); + /// lifetime management + virtual void reset() { } + /// createObjectWriter - Create a new MCObjectWriter instance for use by the /// assembler backend to emit the final object file. virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0; @@ -127,7 +130,7 @@ public: /// fixup requires the associated instruction to be relaxed. virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const = 0; /// RelaxInstruction - Relax the instruction in the given fragment to the next diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 97aad71fd955..28256b3677ef 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ASM_INFO_H -#define LLVM_TARGET_ASM_INFO_H +#ifndef LLVM_MC_MCASMINFO_H +#define LLVM_MC_MCASMINFO_H -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MachineLocation.h" #include #include @@ -48,6 +48,11 @@ namespace llvm { /// Default is 4. unsigned PointerSize; + /// CalleeSaveStackSlotSize - Size of the stack slot reserved for + /// callee-saved registers, in bytes. + /// Default is same as pointer size. + unsigned CalleeSaveStackSlotSize; + /// IsLittleEndian - True if target is little endian. /// Default is true. bool IsLittleEndian; @@ -102,6 +107,9 @@ namespace llvm { /// LabelSuffix - This is appended to emitted labels. const char *LabelSuffix; // Defaults to ":" + /// LabelSuffix - This is appended to emitted labels. + const char *DebugLabelSuffix; // Defaults to ":" + /// GlobalPrefix - If this is set to a non-empty string, it is prepended /// onto all global symbols. This is often used for "_" or ".". const char *GlobalPrefix; // Defaults to "" @@ -340,7 +348,13 @@ namespace llvm { return PointerSize; } - /// islittleendian - True if the target is little endian. + /// getCalleeSaveStackSlotSize - Get the callee-saved register stack slot + /// size in bytes. + unsigned getCalleeSaveStackSlotSize() const { + return CalleeSaveStackSlotSize; + } + + /// isLittleEndian - True if the target is little endian. bool isLittleEndian() const { return IsLittleEndian; } @@ -426,6 +440,11 @@ namespace llvm { const char *getLabelSuffix() const { return LabelSuffix; } + + const char *getDebugLabelSuffix() const { + return DebugLabelSuffix; + } + const char *getGlobalPrefix() const { return GlobalPrefix; } diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h index 0ff3e127ed0e..7286151760c0 100644 --- a/include/llvm/MC/MCAsmInfoCOFF.h +++ b/include/llvm/MC/MCAsmInfoCOFF.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_COFF_TARGET_ASM_INFO_H -#define LLVM_COFF_TARGET_ASM_INFO_H +#ifndef LLVM_MC_MCASMINFOCOFF_H +#define LLVM_MC_MCASMINFOCOFF_H #include "llvm/MC/MCAsmInfo.h" @@ -33,4 +33,4 @@ namespace llvm { } -#endif // LLVM_COFF_TARGET_ASM_INFO_H +#endif // LLVM_MC_MCASMINFOCOFF_H diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h index af552de6e690..3d249f93068d 100644 --- a/include/llvm/MC/MCAsmInfoDarwin.h +++ b/include/llvm/MC/MCAsmInfoDarwin.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DARWIN_TARGET_ASM_INFO_H -#define LLVM_DARWIN_TARGET_ASM_INFO_H +#ifndef LLVM_MC_MCASMINFODARWIN_H +#define LLVM_MC_MCASMINFODARWIN_H #include "llvm/MC/MCAsmInfo.h" @@ -26,4 +26,4 @@ namespace llvm { } -#endif // LLVM_DARWIN_TARGET_ASM_INFO_H +#endif // LLVM_MC_MCASMINFODARWIN_H diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h index cf79216d076a..3058b7b48742 100644 --- a/include/llvm/MC/MCAsmLayout.h +++ b/include/llvm/MC/MCAsmLayout.h @@ -21,10 +21,10 @@ class MCSymbolData; /// Encapsulates the layout of an assembly file at a particular point in time. /// -/// Assembly may requiring compute multiple layouts for a particular assembly +/// Assembly may require computing multiple layouts for a particular assembly /// file as part of the relaxation process. This class encapsulates the layout /// at a single point in time in such a way that it is always possible to -/// efficiently compute the exact addresses of any symbol in the assembly file, +/// efficiently compute the exact address of any symbol in the assembly file, /// even during the relaxation process. class MCAsmLayout { public: @@ -39,14 +39,20 @@ private: /// The last fragment which was laid out, or 0 if nothing has been laid /// out. Fragments are always laid out in order, so all fragments with a - /// lower ordinal will be up to date. - mutable DenseMap LastValidFragment; + /// lower ordinal will be valid. + mutable DenseMap LastValidFragment; /// \brief Make sure that the layout for the given fragment is valid, lazily /// computing it if necessary. - void EnsureValid(const MCFragment *F) const; + void ensureValid(const MCFragment *F) const; - bool isFragmentUpToDate(const MCFragment *F) const; + /// \brief Is the layout for this fragment valid? + bool isFragmentValid(const MCFragment *F) const; + + /// \brief Compute the amount of padding required before this fragment to + /// obey bundling restrictions. + uint64_t computeBundlePadding(const MCFragment *F, + uint64_t FOffset, uint64_t FSize); public: MCAsmLayout(MCAssembler &_Assembler); @@ -54,14 +60,15 @@ public: /// Get the assembler object this is a layout for. MCAssembler &getAssembler() const { return Assembler; } - /// \brief Invalidate all following fragments because a fragment has been - /// resized. The fragments size should have already been updated. - void Invalidate(MCFragment *F); + /// \brief Invalidate the fragments starting with F because it has been + /// resized. The fragment's size should have already been updated, but + /// its bundle padding will be recomputed. + void invalidateFragmentsFrom(MCFragment *F); /// \brief Perform layout for a single fragment, assuming that the previous /// fragment has already been laid out correctly, and the parent section has /// been initialized. - void LayoutFragment(MCFragment *Fragment); + void layoutFragment(MCFragment *Fragment); /// @name Section Access (in layout order) /// @{ diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 5771415c81cc..43fbdc9301ac 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -10,13 +10,13 @@ #ifndef LLVM_MC_MCASSEMBLER_H #define LLVM_MC_MCASSEMBLER_H -#include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCInst.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataTypes.h" #include // FIXME: Shouldn't be needed. @@ -47,8 +47,9 @@ public: enum FragmentType { FT_Align, FT_Data, + FT_CompactEncodedInst, FT_Fill, - FT_Inst, + FT_Relaxable, FT_Org, FT_Dwarf, FT_DwarfFrame, @@ -99,42 +100,139 @@ public: unsigned getLayoutOrder() const { return LayoutOrder; } void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } + /// \brief Does this fragment have instructions emitted into it? By default + /// this is false, but specific fragment types may set it to true. + virtual bool hasInstructions() const { return false; } + + /// \brief Should this fragment be placed at the end of an aligned bundle? + virtual bool alignToBundleEnd() const { return false; } + virtual void setAlignToBundleEnd(bool V) { } + + /// \brief Get the padding size that must be inserted before this fragment. + /// Used for bundling. By default, no padding is inserted. + /// Note that padding size is restricted to 8 bits. This is an optimization + /// to reduce the amount of space used for each fragment. In practice, larger + /// padding should never be required. + virtual uint8_t getBundlePadding() const { + return 0; + } + + /// \brief Set the padding size for this fragment. By default it's a no-op, + /// and only some fragments have a meaningful implementation. + virtual void setBundlePadding(uint8_t N) { + } + void dump(); }; -class MCDataFragment : public MCFragment { +/// Interface implemented by fragments that contain encoded instructions and/or +/// data. +/// +class MCEncodedFragment : public MCFragment { virtual void anchor(); - SmallString<32> Contents; - - /// Fixups - The list of fixups in this fragment. - std::vector Fixups; + uint8_t BundlePadding; public: - typedef std::vector::const_iterator const_fixup_iterator; - typedef std::vector::iterator fixup_iterator; + MCEncodedFragment(MCFragment::FragmentType FType, MCSectionData *SD = 0) + : MCFragment(FType, SD), BundlePadding(0) + { + } + virtual ~MCEncodedFragment(); -public: - MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {} + virtual SmallVectorImpl &getContents() = 0; + virtual const SmallVectorImpl &getContents() const = 0; - /// @name Accessors - /// @{ - - SmallString<32> &getContents() { return Contents; } - const SmallString<32> &getContents() const { return Contents; } - - /// @} - /// @name Fixup Access - /// @{ - - void addFixup(MCFixup Fixup) { - // Enforce invariant that fixups are in offset order. - assert((Fixups.empty() || Fixup.getOffset() >= Fixups.back().getOffset()) && - "Fixups must be added in order!"); - Fixups.push_back(Fixup); + virtual uint8_t getBundlePadding() const { + return BundlePadding; } - std::vector &getFixups() { return Fixups; } - const std::vector &getFixups() const { return Fixups; } + virtual void setBundlePadding(uint8_t N) { + BundlePadding = N; + } + + static bool classof(const MCFragment *F) { + MCFragment::FragmentType Kind = F->getKind(); + switch (Kind) { + default: + return false; + case MCFragment::FT_Relaxable: + case MCFragment::FT_CompactEncodedInst: + case MCFragment::FT_Data: + return true; + } + } +}; + +/// Interface implemented by fragments that contain encoded instructions and/or +/// data and also have fixups registered. +/// +class MCEncodedFragmentWithFixups : public MCEncodedFragment { + virtual void anchor(); + +public: + MCEncodedFragmentWithFixups(MCFragment::FragmentType FType, + MCSectionData *SD = 0) + : MCEncodedFragment(FType, SD) + { + } + + virtual ~MCEncodedFragmentWithFixups(); + + typedef SmallVectorImpl::const_iterator const_fixup_iterator; + typedef SmallVectorImpl::iterator fixup_iterator; + + virtual SmallVectorImpl &getFixups() = 0; + virtual const SmallVectorImpl &getFixups() const = 0; + + virtual fixup_iterator fixup_begin() = 0; + virtual const_fixup_iterator fixup_begin() const = 0; + virtual fixup_iterator fixup_end() = 0; + virtual const_fixup_iterator fixup_end() const = 0; + + static bool classof(const MCFragment *F) { + MCFragment::FragmentType Kind = F->getKind(); + return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data; + } +}; + +/// Fragment for data and encoded instructions. +/// +class MCDataFragment : public MCEncodedFragmentWithFixups { + virtual void anchor(); + + /// \brief Does this fragment contain encoded instructions anywhere in it? + bool HasInstructions; + + /// \brief Should this fragment be aligned to the end of a bundle? + bool AlignToBundleEnd; + + SmallVector Contents; + + /// Fixups - The list of fixups in this fragment. + SmallVector Fixups; +public: + MCDataFragment(MCSectionData *SD = 0) + : MCEncodedFragmentWithFixups(FT_Data, SD), + HasInstructions(false), AlignToBundleEnd(false) + { + } + + virtual SmallVectorImpl &getContents() { return Contents; } + virtual const SmallVectorImpl &getContents() const { return Contents; } + + SmallVectorImpl &getFixups() { + return Fixups; + } + + const SmallVectorImpl &getFixups() const { + return Fixups; + } + + virtual bool hasInstructions() const { return HasInstructions; } + virtual void setHasInstructions(bool V) { HasInstructions = V; } + + virtual bool alignToBundleEnd() const { return AlignToBundleEnd; } + virtual void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; } fixup_iterator fixup_begin() { return Fixups.begin(); } const_fixup_iterator fixup_begin() const { return Fixups.begin(); } @@ -142,60 +240,79 @@ public: fixup_iterator fixup_end() {return Fixups.end();} const_fixup_iterator fixup_end() const {return Fixups.end();} - size_t fixup_size() const { return Fixups.size(); } - - /// @} - static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Data; } }; -// FIXME: This current incarnation of MCInstFragment doesn't make much sense, as -// it is almost entirely a duplicate of MCDataFragment. If we decide to stick -// with this approach (as opposed to making MCInstFragment a very light weight -// object with just the MCInst and a code size, then we should just change -// MCDataFragment to have an optional MCInst at its end. -class MCInstFragment : public MCFragment { +/// This is a compact (memory-size-wise) fragment for holding an encoded +/// instruction (non-relaxable) that has no fixups registered. When applicable, +/// it can be used instead of MCDataFragment and lead to lower memory +/// consumption. +/// +class MCCompactEncodedInstFragment : public MCEncodedFragment { + virtual void anchor(); + + /// \brief Should this fragment be aligned to the end of a bundle? + bool AlignToBundleEnd; + + SmallVector Contents; +public: + MCCompactEncodedInstFragment(MCSectionData *SD = 0) + : MCEncodedFragment(FT_CompactEncodedInst, SD), AlignToBundleEnd(false) + { + } + + virtual bool hasInstructions() const { + return true; + } + + virtual SmallVectorImpl &getContents() { return Contents; } + virtual const SmallVectorImpl &getContents() const { return Contents; } + + virtual bool alignToBundleEnd() const { return AlignToBundleEnd; } + virtual void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_CompactEncodedInst; + } +}; + +/// A relaxable fragment holds on to its MCInst, since it may need to be +/// relaxed during the assembler layout and relaxation stage. +/// +class MCRelaxableFragment : public MCEncodedFragmentWithFixups { virtual void anchor(); /// Inst - The instruction this is a fragment for. MCInst Inst; - /// Code - Binary data for the currently encoded instruction. - SmallString<8> Code; + /// Contents - Binary data for the currently encoded instruction. + SmallVector Contents; /// Fixups - The list of fixups in this fragment. SmallVector Fixups; public: - typedef SmallVectorImpl::const_iterator const_fixup_iterator; - typedef SmallVectorImpl::iterator fixup_iterator; - -public: - MCInstFragment(const MCInst &_Inst, MCSectionData *SD = 0) - : MCFragment(FT_Inst, SD), Inst(_Inst) { + MCRelaxableFragment(const MCInst &_Inst, MCSectionData *SD = 0) + : MCEncodedFragmentWithFixups(FT_Relaxable, SD), Inst(_Inst) { } - /// @name Accessors - /// @{ + virtual SmallVectorImpl &getContents() { return Contents; } + virtual const SmallVectorImpl &getContents() const { return Contents; } - SmallVectorImpl &getCode() { return Code; } - const SmallVectorImpl &getCode() const { return Code; } - - unsigned getInstSize() const { return Code.size(); } - - MCInst &getInst() { return Inst; } const MCInst &getInst() const { return Inst; } - void setInst(const MCInst& Value) { Inst = Value; } - /// @} - /// @name Fixup Access - /// @{ + SmallVectorImpl &getFixups() { + return Fixups; + } - SmallVectorImpl &getFixups() { return Fixups; } - const SmallVectorImpl &getFixups() const { return Fixups; } + const SmallVectorImpl &getFixups() const { + return Fixups; + } + + virtual bool hasInstructions() const { return true; } fixup_iterator fixup_begin() { return Fixups.begin(); } const_fixup_iterator fixup_begin() const { return Fixups.begin(); } @@ -203,12 +320,8 @@ public: fixup_iterator fixup_end() {return Fixups.end();} const_fixup_iterator fixup_end() const {return Fixups.end();} - size_t fixup_size() const { return Fixups.size(); } - - /// @} - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Inst; + return F->getKind() == MCFragment::FT_Relaxable; } }; @@ -442,6 +555,12 @@ public: typedef FragmentListType::const_reverse_iterator const_reverse_iterator; typedef FragmentListType::reverse_iterator reverse_iterator; + /// \brief Express the state of bundle locked groups while emitting code. + enum BundleLockStateType { + NotBundleLocked, + BundleLocked, + BundleLockedAlignToEnd + }; private: FragmentListType Fragments; const MCSection *Section; @@ -455,6 +574,13 @@ private: /// Alignment - The maximum alignment seen in this section. unsigned Alignment; + /// \brief Keeping track of bundle-locked state. + BundleLockStateType BundleLockState; + + /// \brief We've seen a bundle_lock directive but not its first instruction + /// yet. + bool BundleGroupBeforeFirstInst; + /// @name Assembler Backend Data /// @{ // @@ -507,6 +633,26 @@ public: bool empty() const { return Fragments.empty(); } + bool isBundleLocked() const { + return BundleLockState != NotBundleLocked; + } + + BundleLockStateType getBundleLockState() const { + return BundleLockState; + } + + void setBundleLockState(BundleLockStateType NewState) { + BundleLockState = NewState; + } + + bool isBundleGroupBeforeFirstInst() const { + return BundleGroupBeforeFirstInst; + } + + void setBundleGroupBeforeFirstInst(bool IsFirst) { + BundleGroupBeforeFirstInst = IsFirst; + } + void dump(); /// @} @@ -703,6 +849,10 @@ private: std::vector IndirectSymbols; std::vector DataRegions; + + /// The list of linker options to propagate into the object file. + std::vector > LinkerOptions; + /// The set of function symbols for which a .thumb_func directive has /// been seen. // @@ -712,10 +862,21 @@ private: // refactoring too. SmallPtrSet ThumbFuncs; + /// \brief The bundle alignment size currently set in the assembler. + /// + /// By default it's 0, which means bundling is disabled. + unsigned BundleAlignSize; + unsigned RelaxAll : 1; unsigned NoExecStack : 1; unsigned SubsectionsViaSymbols : 1; + /// ELF specific e_header flags + // It would be good if there were an MCELFAssembler class to hold this. + // ELF header flags are used both by the integrated and standalone assemblers. + // Access to the flags is necessary in cases where assembler directives affect + // which flags to be set. + unsigned ELFHeaderEFlags; private: /// Evaluate a fixup to a relocatable expression and the value which should be /// placed into the fixup. @@ -736,20 +897,22 @@ private: /// Check whether a fixup can be satisfied, or whether it needs to be relaxed /// (increased in size, in order to hold its value correctly). - bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF, + bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; /// Check whether the given fragment needs relaxation. - bool fragmentNeedsRelaxation(const MCInstFragment *IF, + bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF, const MCAsmLayout &Layout) const; - /// layoutOnce - Perform one layout iteration and return true if any offsets + /// \brief Perform one layout iteration and return true if any offsets /// were adjusted. bool layoutOnce(MCAsmLayout &Layout); + /// \brief Perform one layout iteration of the given section and return true + /// if any offsets were adjusted. bool layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD); - bool relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF); + bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF); bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); @@ -791,6 +954,10 @@ public: /// Flag a function symbol as the target of a .thumb_func directive. void setIsThumbFunc(const MCSymbol *Func) { ThumbFuncs.insert(Func); } + /// ELF e_header flags + unsigned getELFHeaderEFlags() const {return ELFHeaderEFlags;} + void setELFHeaderEFlags(unsigned Flags) { ELFHeaderEFlags = Flags;} + public: /// Construct a new assembler instance. /// @@ -805,6 +972,10 @@ public: raw_ostream &OS); ~MCAssembler(); + /// Reuse an assembler instance + /// + void reset(); + MCContext &getContext() const { return Context; } MCAsmBackend &getBackend() const { return Backend; } @@ -832,6 +1003,20 @@ public: bool getNoExecStack() const { return NoExecStack; } void setNoExecStack(bool Value) { NoExecStack = Value; } + bool isBundlingEnabled() const { + return BundleAlignSize != 0; + } + + unsigned getBundleAlignSize() const { + return BundleAlignSize; + } + + void setBundleAlignSize(unsigned Size) { + assert((Size == 0 || !(Size & (Size - 1))) && + "Expect a power-of-two bundle align size"); + BundleAlignSize = Size; + } + /// @name Section List Access /// @{ @@ -888,6 +1073,14 @@ public: size_t indirect_symbol_size() const { return IndirectSymbols.size(); } + /// @} + /// @name Linker Option List Access + /// @{ + + std::vector > &getLinkerOptions() { + return LinkerOptions; + } + /// @} /// @name Data Region List Access /// @{ diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h index 682cf7cd76c6..ae5bf0bc2069 100644 --- a/include/llvm/MC/MCAtom.h +++ b/include/llvm/MC/MCAtom.h @@ -46,8 +46,8 @@ class MCAtom { : Type(T), Parent(P), Begin(B), End(E) { } public: - bool isTextAtom() { return Type == TextAtom; } - bool isDataAtom() { return Type == DataAtom; } + bool isTextAtom() const { return Type == TextAtom; } + bool isDataAtom() const { return Type == DataAtom; } void addInst(const MCInst &I, uint64_t Address, unsigned Size); void addData(const MCData &D); diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h index 057489090293..9bfa08eb5d01 100644 --- a/include/llvm/MC/MCCodeEmitter.h +++ b/include/llvm/MC/MCCodeEmitter.h @@ -29,6 +29,9 @@ protected: // Can only create subclasses. public: virtual ~MCCodeEmitter(); + /// Lifetime management + virtual void reset() { } + /// EncodeInstruction - Encode the given \p Inst to bytes on the output /// stream \p OS. virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS, diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 5a8830cb66ce..0db3dee2ff05 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -10,13 +10,15 @@ #ifndef LLVM_MC_MCCONTEXT_H #define LLVM_MC_MCCONTEXT_H -#include "llvm/MC/SectionKind.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include #include // FIXME: Shouldn't be needed. namespace llvm { @@ -94,9 +96,19 @@ namespace llvm { /// .secure_log_reset appearing between them. bool SecureLogUsed; + /// The compilation directory to use for DW_AT_comp_dir. + std::string CompilationDir; + + /// The main file name if passed in explicitly. + std::string MainFileName; + /// The dwarf file and directory tables from the dwarf .file directive. - std::vector MCDwarfFiles; - std::vector MCDwarfDirs; + /// We now emit a line table for each compile unit. To reduce the prologue + /// size of each line table, the files and directories used by each compile + /// unit are separated. + typedef std::map > MCDwarfFilesMap; + MCDwarfFilesMap MCDwarfFilesCUMap; + std::map > MCDwarfDirsCUMap; /// The current dwarf line information from the last dwarf .loc directive. MCDwarfLoc CurrentDwarfLoc; @@ -123,6 +135,10 @@ namespace llvm { /// non-empty. StringRef DwarfDebugFlags; + /// The string to embed in as the dwarf AT_producer for the compile unit, if + /// non-empty. + StringRef DwarfDebugProducer; + /// Honor temporary labels, this is useful for debugging semantic /// differences between temporary and non-temporary labels (primarily on /// Darwin). @@ -134,14 +150,22 @@ namespace llvm { /// We need a deterministic iteration order, so we remember the order /// the elements were added. std::vector MCLineSectionOrder; + /// The Compile Unit ID that we are currently processing. + unsigned DwarfCompileUnitID; + /// The line table start symbol for each Compile Unit. + DenseMap MCLineTableSymbols; void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap; + /// Do automatic reset in destructor + bool AutoReset; + MCSymbol *CreateSymbol(StringRef Name); public: explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0); + const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0, + bool DoAutoReset = true); ~MCContext(); const SourceMgr *getSourceManager() const { return SrcMgr; } @@ -154,6 +178,15 @@ namespace llvm { void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; } + /// @name Module Lifetime Management + /// @{ + + /// reset - return object to right after construction state to prepare + /// to process a new module + void reset(); + + /// @} + /// @name Symbol Management /// @{ @@ -235,21 +268,45 @@ namespace llvm { /// @name Dwarf Management /// @{ + /// \brief Get the compilation directory for DW_AT_comp_dir + /// This can be overridden by clients which want to control the reported + /// compilation directory and have it be something other than the current + /// working directory. + const std::string &getCompilationDir() const { return CompilationDir; } + + /// \brief Set the compilation directory for DW_AT_comp_dir + /// Override the default (CWD) compilation directory. + void setCompilationDir(StringRef S) { CompilationDir = S.str(); } + + /// \brief Get the main file name for use in error messages and debug + /// info. This can be set to ensure we've got the correct file name + /// after preprocessing or for -save-temps. + const std::string &getMainFileName() const { return MainFileName; } + + /// \brief Set the main file name and override the default. + void setMainFileName(StringRef S) { MainFileName = S.str(); } + /// GetDwarfFile - creates an entry in the dwarf file and directory tables. unsigned GetDwarfFile(StringRef Directory, StringRef FileName, - unsigned FileNumber); + unsigned FileNumber, unsigned CUID); - bool isValidDwarfFileNumber(unsigned FileNumber); + bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0); bool hasDwarfFiles() const { - return !MCDwarfFiles.empty(); + // Traverse MCDwarfFilesCUMap and check whether each entry is empty. + MCDwarfFilesMap::const_iterator MapB, MapE; + for (MapB = MCDwarfFilesCUMap.begin(), MapE = MCDwarfFilesCUMap.end(); + MapB != MapE; MapB++) + if (!MapB->second.empty()) + return true; + return false; } - const std::vector &getMCDwarfFiles() { - return MCDwarfFiles; + const SmallVectorImpl &getMCDwarfFiles(unsigned CUID = 0) { + return MCDwarfFilesCUMap[CUID]; } - const std::vector &getMCDwarfDirs() { - return MCDwarfDirs; + const SmallVectorImpl &getMCDwarfDirs(unsigned CUID = 0) { + return MCDwarfDirsCUMap[CUID]; } const DenseMap @@ -263,6 +320,25 @@ namespace llvm { MCLineSections[Sec] = Line; MCLineSectionOrder.push_back(Sec); } + unsigned getDwarfCompileUnitID() { + return DwarfCompileUnitID; + } + void setDwarfCompileUnitID(unsigned CUIndex) { + DwarfCompileUnitID = CUIndex; + } + const DenseMap &getMCLineTableSymbols() const { + return MCLineTableSymbols; + } + MCSymbol *getMCLineTableSymbol(unsigned ID) const { + DenseMap::const_iterator CIter = + MCLineTableSymbols.find(ID); + if (CIter == MCLineTableSymbols.end()) + return NULL; + return CIter->second; + } + void setMCLineTableSymbol(MCSymbol *Sym, unsigned ID) { + MCLineTableSymbols[ID] = Sym; + } /// setCurrentDwarfLoc - saves the information from the currently parsed /// dwarf .loc directive and sets DwarfLocSeen. When the next instruction @@ -309,6 +385,9 @@ namespace llvm { void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; } StringRef getDwarfDebugFlags() { return DwarfDebugFlags; } + void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; } + StringRef getDwarfDebugProducer() { return DwarfDebugProducer; } + /// @} char *getSecureLogFile() { return SecureLogFile; } diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index 53a9ce0a3648..36fbcb02d9f6 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -6,11 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -#ifndef MCDISASSEMBLER_H -#define MCDISASSEMBLER_H +#ifndef LLVM_MC_MCDISASSEMBLER_H +#define LLVM_MC_MCDISASSEMBLER_H -#include "llvm/Support/DataTypes.h" #include "llvm-c/Disassembler.h" +#include "llvm/Support/DataTypes.h" namespace llvm { @@ -20,8 +20,6 @@ class MemoryObject; class raw_ostream; class MCContext; -struct EDInstInfo; - /// MCDisassembler - Superclass for all disassemblers. Consumes a memory region /// and provides an array of assembly instructions. class MCDisassembler { @@ -84,14 +82,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const = 0; - /// getEDInfo - Returns the enhanced instruction information corresponding to - /// the disassembler. - /// - /// @return - An array of instruction information, with one entry for - /// each MCInst opcode this disassembler returns. - /// NULL if there is no info for this target. - virtual const EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } - private: // // Hooks for symbolic disassembly via the public 'C' interface. diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 8fc437f3e691..1a392e8755ee 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -16,10 +16,10 @@ #define LLVM_MC_MCDWARF_H #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +#include #include namespace llvm { @@ -187,29 +187,43 @@ namespace llvm { MCLineSection() {} // addLineEntry - adds an entry to this MCLineSection's line entries - void addLineEntry(const MCLineEntry &LineEntry) { - MCLineEntries.push_back(LineEntry); + void addLineEntry(const MCLineEntry &LineEntry, unsigned CUID) { + MCLineDivisions[CUID].push_back(LineEntry); } typedef std::vector MCLineEntryCollection; typedef MCLineEntryCollection::iterator iterator; typedef MCLineEntryCollection::const_iterator const_iterator; + typedef std::map MCLineDivisionMap; private: - MCLineEntryCollection MCLineEntries; + // A collection of MCLineEntry for each Compile Unit ID. + MCLineDivisionMap MCLineDivisions; public: - const MCLineEntryCollection *getMCLineEntries() const { - return &MCLineEntries; + // Returns whether MCLineSection contains entries for a given Compile + // Unit ID. + bool containEntriesForID(unsigned CUID) const { + return MCLineDivisions.count(CUID); + } + // Returns the collection of MCLineEntry for a given Compile Unit ID. + const MCLineEntryCollection &getMCLineEntries(unsigned CUID) const { + MCLineDivisionMap::const_iterator CIter = MCLineDivisions.find(CUID); + assert(CIter != MCLineDivisions.end()); + return CIter->second; } }; class MCDwarfFileTable { public: // - // This emits the Dwarf file and the line tables. + // This emits the Dwarf file and the line tables for all Compile Units. // static const MCSymbol *Emit(MCStreamer *MCOS); + // + // This emits the Dwarf file and the line tables for a given Compile Unit. + // + static const MCSymbol *EmitCU(MCStreamer *MCOS, unsigned ID); }; class MCDwarfLineAddr { @@ -266,42 +280,115 @@ namespace llvm { class MCCFIInstruction { public: - enum OpType { SameValue, RememberState, RestoreState, Move, RelMove, Escape, - Restore}; + enum OpType { OpSameValue, OpRememberState, OpRestoreState, OpOffset, + OpDefCfaRegister, OpDefCfaOffset, OpDefCfa, OpRelOffset, + OpAdjustCfaOffset, OpEscape, OpRestore, OpUndefined, + OpRegister }; private: OpType Operation; MCSymbol *Label; - // Move to & from location. - MachineLocation Destination; - MachineLocation Source; + unsigned Register; + union { + int Offset; + unsigned Register2; + }; std::vector Values; + + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) : + Operation(Op), Label(L), Register(R), Offset(O), + Values(V.begin(), V.end()) { + assert(Op != OpRegister); + } + + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2) : + Operation(Op), Label(L), Register(R1), Register2(R2) { + assert(Op == OpRegister); + } + public: - MCCFIInstruction(OpType Op, MCSymbol *L) - : Operation(Op), Label(L) { - assert(Op == RememberState || Op == RestoreState); + static MCCFIInstruction + createOffset(MCSymbol *L, unsigned Register, int Offset) { + return MCCFIInstruction(OpOffset, L, Register, Offset, ""); } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register) - : Operation(Op), Label(L), Destination(Register) { - assert(Op == SameValue || Op == Restore); + + static MCCFIInstruction + createDefCfaRegister(MCSymbol *L, unsigned Register) { + return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, ""); } - MCCFIInstruction(MCSymbol *L, const MachineLocation &D, - const MachineLocation &S) - : Operation(Move), Label(L), Destination(D), Source(S) { + + static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset) { + return MCCFIInstruction(OpDefCfaOffset, L, 0, -Offset, ""); } - MCCFIInstruction(OpType Op, MCSymbol *L, const MachineLocation &D, - const MachineLocation &S) - : Operation(Op), Label(L), Destination(D), Source(S) { - assert(Op == RelMove); + + static MCCFIInstruction + createDefCfa(MCSymbol *L, unsigned Register, int Offset) { + return MCCFIInstruction(OpDefCfa, L, Register, -Offset, ""); } - MCCFIInstruction(OpType Op, MCSymbol *L, StringRef Vals) - : Operation(Op), Label(L), Values(Vals.begin(), Vals.end()) { - assert(Op == Escape); + + static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register) { + return MCCFIInstruction(OpUndefined, L, Register, 0, ""); } + + static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register) { + return MCCFIInstruction(OpRestore, L, Register, 0, ""); + } + + static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register) { + return MCCFIInstruction(OpSameValue, L, Register, 0, ""); + } + + static MCCFIInstruction createRestoreState(MCSymbol *L) { + return MCCFIInstruction(OpRestoreState, L, 0, 0, ""); + } + + static MCCFIInstruction createRememberState(MCSymbol *L) { + return MCCFIInstruction(OpRememberState, L, 0, 0, ""); + } + + static MCCFIInstruction + createRelOffset(MCSymbol *L, unsigned Register, int Offset) { + return MCCFIInstruction(OpRelOffset, L, Register, Offset, ""); + } + + static MCCFIInstruction + createAdjustCfaOffset(MCSymbol *L, int Adjustment) { + return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, ""); + } + + static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) { + return MCCFIInstruction(OpEscape, L, 0, 0, Vals); + } + + static MCCFIInstruction + createRegister(MCSymbol *L, unsigned Register1, unsigned Register2) { + return MCCFIInstruction(OpRegister, L, Register1, Register2); + } + OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } - const MachineLocation &getDestination() const { return Destination; } - const MachineLocation &getSource() const { return Source; } + + unsigned getRegister() const { + assert(Operation == OpDefCfa || Operation == OpOffset || + Operation == OpRestore || Operation == OpUndefined || + Operation == OpSameValue || Operation == OpDefCfaRegister || + Operation == OpRelOffset || Operation == OpRegister); + return Register; + } + + unsigned getRegister2() const { + assert(Operation == OpRegister); + return Register2; + } + + int getOffset() const { + assert(Operation == OpDefCfa || Operation == OpOffset || + Operation == OpRelOffset || Operation == OpDefCfaOffset || + Operation == OpAdjustCfaOffset); + return Offset; + } + const StringRef getValues() const { + assert(Operation == OpEscape); return StringRef(&Values[0], Values.size()); } }; diff --git a/lib/MC/MCELF.h b/include/llvm/MC/MCELF.h similarity index 90% rename from lib/MC/MCELF.h rename to include/llvm/MC/MCELF.h index e08f1e65429a..7e59911a89c3 100644 --- a/lib/MC/MCELF.h +++ b/include/llvm/MC/MCELF.h @@ -28,6 +28,8 @@ class MCELF { static unsigned GetType(const MCSymbolData &SD); static void SetVisibility(MCSymbolData &SD, unsigned Visibility); static unsigned GetVisibility(MCSymbolData &SD); + static void setOther(MCSymbolData &SD, unsigned Other); + static unsigned getOther(MCSymbolData &SD); }; } diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 38cdc7293ba0..a59776d5cdaa 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -79,7 +79,6 @@ public: virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const = 0; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h new file mode 100644 index 000000000000..6fb2d22be2e7 --- /dev/null +++ b/include/llvm/MC/MCELFStreamer.h @@ -0,0 +1,125 @@ +//===- MCELFStreamer.h - MCStreamer ELF Object File Interface ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFSTREAMER_H +#define LLVM_MC_MCELFSTREAMER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { +class MCAsmBackend; +class MCAssembler; +class MCCodeEmitter; +class MCExpr; +class MCInst; +class MCSymbol; +class MCSymbolData; +class raw_ostream; + +class MCELFStreamer : public MCObjectStreamer { +protected: + MCELFStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Kind, Context, TAB, OS, Emitter) {} + +public: + MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter) + : MCObjectStreamer(SK_ELFStreamer, Context, TAB, OS, Emitter) {} + + MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, MCAssembler *Assembler) + : MCObjectStreamer(SK_ELFStreamer, Context, TAB, OS, Emitter, + Assembler) {} + + virtual ~MCELFStreamer(); + + /// @name MCStreamer Interface + /// @{ + + virtual void InitSections(); + virtual void InitToTextSection(); + virtual void ChangeSection(const MCSection *Section); + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitDebugLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitThumbFunc(MCSymbol *Func); + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + + virtual MCSymbolData &getOrCreateSymbolData(MCSymbol *Symbol); + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + uint64_t Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + + virtual void EmitFileDirective(StringRef Filename); + + virtual void EmitTCEntry(const MCSymbol &S); + + virtual void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned); + + virtual void FinishImpl(); + /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_ELFStreamer || S->getKind() == SK_ARMELFStreamer; + } + +private: + virtual void EmitInstToFragment(const MCInst &Inst); + virtual void EmitInstToData(const MCInst &Inst); + + virtual void EmitBundleAlignMode(unsigned AlignPow2); + virtual void EmitBundleLock(bool AlignToEnd); + virtual void EmitBundleUnlock(); + + void fixSymbolsInTLSFixups(const MCExpr *expr); + + struct LocalCommon { + MCSymbolData *SD; + uint64_t Size; + unsigned ByteAlignment; + }; + + std::vector LocalCommons; + + SmallPtrSet BindingExplicitlySet; + + + void SetSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind); + void SetSectionData(); + void SetSectionText(); + void SetSectionBss(); +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index 1007aa526493..b5bfed18eca4 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -160,6 +160,7 @@ public: VK_TLVP, // Mach-O thread local variable relocation VK_SECREL, // FIXME: We'd really like to use the generic Kinds listed above for these. + VK_ARM_NONE, VK_ARM_PLT, // ARM-style PLT references. i.e., (PLT) instead of @PLT VK_ARM_TLSGD, // ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF VK_ARM_GOT, @@ -168,15 +169,29 @@ public: VK_ARM_GOTTPOFF, VK_ARM_TARGET1, VK_ARM_TARGET2, + VK_ARM_PREL31, VK_PPC_TOC, // TOC base VK_PPC_TOC_ENTRY, // TOC entry VK_PPC_DARWIN_HA16, // ha16(symbol) VK_PPC_DARWIN_LO16, // lo16(symbol) VK_PPC_GAS_HA16, // symbol@ha - VK_PPC_GAS_LO16, // symbol@l + VK_PPC_GAS_LO16, // symbol@l VK_PPC_TPREL16_HA, // symbol@tprel@ha VK_PPC_TPREL16_LO, // symbol@tprel@l + VK_PPC_DTPREL16_HA, // symbol@dtprel@ha + VK_PPC_DTPREL16_LO, // symbol@dtprel@l + VK_PPC_TOC16_HA, // symbol@toc@ha + VK_PPC_TOC16_LO, // symbol@toc@l + VK_PPC_GOT_TPREL16_HA, // symbol@got@tprel@ha + VK_PPC_GOT_TPREL16_LO, // symbol@got@tprel@l + VK_PPC_TLS, // symbol@tls + VK_PPC_GOT_TLSGD16_HA, // symbol@got@tlsgd@ha + VK_PPC_GOT_TLSGD16_LO, // symbol@got@tlsgd@l + VK_PPC_TLSGD, // symbol@tlsgd + VK_PPC_GOT_TLSLD16_HA, // symbol@got@tlsld@ha + VK_PPC_GOT_TLSLD16_LO, // symbol@got@tlsld@l + VK_PPC_TLSLD, // symbol@tlsld VK_Mips_GPREL, VK_Mips_GOT_CALL, @@ -457,6 +472,8 @@ public: virtual void AddValueSymbols(MCAssembler *) const = 0; virtual const MCSection *FindAssociatedSection() const = 0; + virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0; + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h index 22b3c32abde9..ad99943df2c3 100644 --- a/include/llvm/MC/MCFixedLenDisassembler.h +++ b/include/llvm/MC/MCFixedLenDisassembler.h @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // Fixed length disassembler decoder state machine driver. //===----------------------------------------------------------------------===// -#ifndef MCFIXEDLENDISASSEMBLER_H -#define MCFIXEDLENDISASSEMBLER_H +#ifndef LLVM_MC_MCFIXEDLENDISASSEMBLER_H +#define LLVM_MC_MCFIXEDLENDISASSEMBLER_H namespace llvm { diff --git a/include/llvm/MC/MCInstBuilder.h b/include/llvm/MC/MCInstBuilder.h new file mode 100644 index 000000000000..c5acb26eecac --- /dev/null +++ b/include/llvm/MC/MCInstBuilder.h @@ -0,0 +1,68 @@ +//===-- llvm/MC/MCInstBuilder.h - Simplify creation of MCInsts --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MCInstBuilder class for convenient creation of +// MCInsts. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCINSTBUILDER_H +#define LLVM_MC_MCINSTBUILDER_H + +#include "llvm/MC/MCInst.h" + +namespace llvm { + +class MCInstBuilder { + MCInst Inst; + +public: + /// \brief Create a new MCInstBuilder for an MCInst with a specific opcode. + MCInstBuilder(unsigned Opcode) { + Inst.setOpcode(Opcode); + } + + /// \brief Add a new register operand. + MCInstBuilder &addReg(unsigned Reg) { + Inst.addOperand(MCOperand::CreateReg(Reg)); + return *this; + } + + /// \brief Add a new integer immediate operand. + MCInstBuilder &addImm(int64_t Val) { + Inst.addOperand(MCOperand::CreateImm(Val)); + return *this; + } + + /// \brief Add a new floating point immediate operand. + MCInstBuilder &addFPImm(double Val) { + Inst.addOperand(MCOperand::CreateFPImm(Val)); + return *this; + } + + /// \brief Add a new MCExpr operand. + MCInstBuilder &addExpr(const MCExpr *Val) { + Inst.addOperand(MCOperand::CreateExpr(Val)); + return *this; + } + + /// \brief Add a new MCInst operand. + MCInstBuilder &addInst(const MCInst *Val) { + Inst.addOperand(MCOperand::CreateInst(Val)); + return *this; + } + + operator MCInst&() { + return Inst; + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h index 3b9420a40389..a18cbd94bbbf 100644 --- a/include/llvm/MC/MCInstPrinter.h +++ b/include/llvm/MC/MCInstPrinter.h @@ -10,6 +10,9 @@ #ifndef LLVM_MC_MCINSTPRINTER_H #define LLVM_MC_MCINSTPRINTER_H +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Format.h" + namespace llvm { class MCInst; class raw_ostream; @@ -36,13 +39,16 @@ protected: /// True if we are printing marked up assembly. bool UseMarkup; + /// True if we are printing immediates as hex. + bool PrintImmHex; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); public: MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii, const MCRegisterInfo &mri) : CommentStream(0), MAI(mai), MII(mii), MRI(mri), AvailableFeatures(0), - UseMarkup(0) {} + UseMarkup(0), PrintImmHex(0) {} virtual ~MCInstPrinter(); @@ -70,6 +76,12 @@ public: /// Utility functions to make adding mark ups simpler. StringRef markup(StringRef s) const; StringRef markup(StringRef a, StringRef b) const; + + bool getPrintImmHex() const { return PrintImmHex; } + void setPrintImmHex(bool Value) { PrintImmHex = Value; } + + /// Utility function to print immediates in decimal or hex. + format_object1 formatImm(const int64_t Value) const; }; } // namespace llvm diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 02383f8bc658..9b5415add241 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -15,6 +15,8 @@ #ifndef LLVM_MC_MCINSTRDESC_H #define LLVM_MC_MCINSTRDESC_H +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -144,7 +146,7 @@ public: const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands - /// getOperandConstraint - Returns the value of the specific constraint if + /// \brief Returns the value of the specific constraint if /// it is set. Returns -1 if it is not set. int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const { @@ -156,12 +158,12 @@ public: return -1; } - /// getOpcode - Return the opcode number for this descriptor. + /// \brief Return the opcode number for this descriptor. unsigned getOpcode() const { return Opcode; } - /// getNumOperands - Return the number of declared MachineOperands for this + /// \brief Return the number of declared MachineOperands for this /// MachineInstruction. Note that variadic (isVariadic() returns true) /// instructions may have additional operands at the end of the list, and note /// that the machine instruction may include implicit register def/uses as @@ -170,7 +172,7 @@ public: return NumOperands; } - /// getNumDefs - Return the number of MachineOperands that are register + /// \brief Return the number of MachineOperands that are register /// definitions. Register definitions always occur at the start of the /// machine operand list. This is the number of "outs" in the .td file, /// and does not include implicit defs. @@ -178,11 +180,10 @@ public: return NumDefs; } - /// getFlags - Return flags of this instruction. - /// + /// \brief Return flags of this instruction. unsigned getFlags() const { return Flags; } - /// isVariadic - Return true if this instruction can have a variable number of + /// \brief Return true if this instruction can have a variable number of /// operands. In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are /// present). @@ -190,35 +191,37 @@ public: return Flags & (1 << MCID::Variadic); } - /// hasOptionalDef - Set if this instruction has an optional definition, e.g. + /// \brief Set if this instruction has an optional definition, e.g. /// ARM instructions which can set condition code if 's' bit is set. bool hasOptionalDef() const { return Flags & (1 << MCID::HasOptionalDef); } - /// isPseudo - Return true if this is a pseudo instruction that doesn't + /// \brief Return true if this is a pseudo instruction that doesn't /// correspond to a real machine instruction. /// bool isPseudo() const { return Flags & (1 << MCID::Pseudo); } + /// \brief Return true if the instruction is a return. bool isReturn() const { return Flags & (1 << MCID::Return); } + /// \brief Return true if the instruction is a call. bool isCall() const { return Flags & (1 << MCID::Call); } - /// isBarrier - Returns true if the specified instruction stops control flow + /// \brief Returns true if the specified instruction stops control flow /// from executing the instruction immediately following it. Examples include /// unconditional branches and return instructions. bool isBarrier() const { return Flags & (1 << MCID::Barrier); } - /// isTerminator - Returns true if this instruction part of the terminator for + /// \brief Returns true if this instruction part of the terminator for /// a basic block. Typically this is things like return and branch /// instructions. /// @@ -228,7 +231,7 @@ public: return Flags & (1 << MCID::Terminator); } - /// isBranch - Returns true if this is a conditional, unconditional, or + /// \brief Returns true if this is a conditional, unconditional, or /// indirect branch. Predicates below can be used to discriminate between /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to /// get more information. @@ -236,13 +239,13 @@ public: return Flags & (1 << MCID::Branch); } - /// isIndirectBranch - Return true if this is an indirect branch, such as a + /// \brief Return true if this is an indirect branch, such as a /// branch through a register. bool isIndirectBranch() const { return Flags & (1 << MCID::IndirectBranch); } - /// isConditionalBranch - Return true if this is a branch which may fall + /// \brief Return true if this is a branch which may fall /// through to the next instruction or may transfer control flow to some other /// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more /// information about this branch. @@ -250,7 +253,7 @@ public: return isBranch() & !isBarrier() & !isIndirectBranch(); } - /// isUnconditionalBranch - Return true if this is a branch which always + /// \brief Return true if this is a branch which always /// transfers control flow to some other block. The /// TargetInstrInfo::AnalyzeBranch method can be used to get more information /// about this branch. @@ -258,38 +261,47 @@ public: return isBranch() & isBarrier() & !isIndirectBranch(); } - // isPredicable - Return true if this instruction has a predicate operand that - // controls execution. It may be set to 'always', or may be set to other - /// values. There are various methods in TargetInstrInfo that can be used to + /// \brief Return true if this is a branch or an instruction which directly + /// writes to the program counter. Considered 'may' affect rather than + /// 'does' affect as things like predication are not taken into account. + bool mayAffectControlFlow(const MCInst &MI, const MCRegisterInfo &RI) const { + if (isBranch() || isCall() || isReturn() || isIndirectBranch()) + return true; + unsigned PC = RI.getProgramCounter(); + if (PC == 0) return false; + return hasDefOfPhysReg(MI, PC, RI); + } + + /// \brief Return true if this instruction has a predicate operand + /// that controls execution. It may be set to 'always', or may be set to other + /// values. There are various methods in TargetInstrInfo that can be used to /// control and modify the predicate in this instruction. bool isPredicable() const { return Flags & (1 << MCID::Predicable); } - /// isCompare - Return true if this instruction is a comparison. + /// \brief Return true if this instruction is a comparison. bool isCompare() const { return Flags & (1 << MCID::Compare); } - /// isMoveImmediate - Return true if this instruction is a move immediate + /// \brief Return true if this instruction is a move immediate /// (including conditional moves) instruction. bool isMoveImmediate() const { return Flags & (1 << MCID::MoveImm); } - /// isBitcast - Return true if this instruction is a bitcast instruction. - /// + /// \brief Return true if this instruction is a bitcast instruction. bool isBitcast() const { return Flags & (1 << MCID::Bitcast); } - /// isSelect - Return true if this is a select instruction. - /// + /// \brief Return true if this is a select instruction. bool isSelect() const { return Flags & (1 << MCID::Select); } - /// isNotDuplicable - Return true if this instruction cannot be safely + /// \brief Return true if this instruction cannot be safely /// duplicated. For example, if the instruction has a unique labels attached /// to it, duplicating it would cause multiple definition errors. bool isNotDuplicable() const { @@ -318,7 +330,7 @@ public: // Side Effect Analysis //===--------------------------------------------------------------------===// - /// mayLoad - Return true if this instruction could possibly read memory. + /// \brief Return true if this instruction could possibly read memory. /// Instructions with this flag set are not necessarily simple load /// instructions, they may load a value and modify it, for example. bool mayLoad() const { @@ -326,7 +338,7 @@ public: } - /// mayStore - Return true if this instruction could possibly modify memory. + /// \brief Return true if this instruction could possibly modify memory. /// Instructions with this flag set are not necessarily simple store /// instructions, they may store a modified value based on their operands, or /// may not actually modify anything, for example. @@ -459,8 +471,7 @@ public: return ImplicitUses; } - /// getNumImplicitUses - Return the number of implicit uses this instruction - /// has. + /// \brief Return the number of implicit uses this instruction has. unsigned getNumImplicitUses() const { if (ImplicitUses == 0) return 0; unsigned i = 0; @@ -482,8 +493,7 @@ public: return ImplicitDefs; } - /// getNumImplicitDefs - Return the number of implicit defs this instruction - /// has. + /// \brief Return the number of implicit defs this instruct has. unsigned getNumImplicitDefs() const { if (ImplicitDefs == 0) return 0; unsigned i = 0; @@ -491,7 +501,7 @@ public: return i; } - /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly + /// \brief Return true if this instruction implicitly /// uses the specified physical register. bool hasImplicitUseOfPhysReg(unsigned Reg) const { if (const uint16_t *ImpUses = ImplicitUses) @@ -500,31 +510,43 @@ public: return false; } - /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly + /// \brief Return true if this instruction implicitly /// defines the specified physical register. - bool hasImplicitDefOfPhysReg(unsigned Reg) const { + bool hasImplicitDefOfPhysReg(unsigned Reg, + const MCRegisterInfo *MRI = 0) const { if (const uint16_t *ImpDefs = ImplicitDefs) for (; *ImpDefs; ++ImpDefs) - if (*ImpDefs == Reg) return true; + if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs))) + return true; return false; } - /// getSchedClass - Return the scheduling class for this instruction. The + /// \brief Return true if this instruction defines the specified physical + /// register, either explicitly or implicitly. + bool hasDefOfPhysReg(const MCInst &MI, unsigned Reg, + const MCRegisterInfo &RI) const { + for (int i = 0, e = NumDefs; i != e; ++i) + if (MI.getOperand(i).isReg() && + RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg())) + return true; + return hasImplicitDefOfPhysReg(Reg, &RI); + } + + /// \brief Return the scheduling class for this instruction. The /// scheduling class is an index into the InstrItineraryData table. This /// returns zero if there is no known scheduling information for the /// instruction. - /// unsigned getSchedClass() const { return SchedClass; } - /// getSize - Return the number of bytes in the encoding of this instruction, + /// \brief Return the number of bytes in the encoding of this instruction, /// or zero if the encoding size cannot be known from the opcode. unsigned getSize() const { return Size; } - /// findFirstPredOperandIdx() - Find the index of the first operand in the + /// \brief Find the index of the first operand in the /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int findFirstPredOperandIdx() const { diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index efaabfb9e88b..3c9a588d0413 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -45,6 +45,13 @@ protected: public: virtual ~MCMachObjectTargetWriter(); + /// @name Lifetime Management + /// @{ + + virtual void reset() {}; + + /// @} + /// @name Accessors /// @{ @@ -111,6 +118,13 @@ public: : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { } + /// @name Lifetime management Methods + /// @{ + + virtual void reset(); + + /// @} + /// @name Utility Methods /// @{ @@ -182,6 +196,8 @@ public: void WriteLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset, uint32_t DataSize); + void WriteLinkerOptionsLoadCommand(const std::vector &Options); + // FIXME: We really need to improve the relocation validation. Basically, we // want to implement a separate computation which evaluates the relocation // entry as the linker would, and verifies that the resultant fixup value is @@ -223,8 +239,6 @@ public: /// ComputeSymbolTable - Compute the symbol table data /// /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, std::vector &LocalSymbolData, std::vector &ExternalSymbolData, diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 23e5513ae35e..c8d748420e31 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -84,15 +84,6 @@ protected: /// this is the section to emit them into. const MCSection *CompactUnwindSection; - /// DwarfAccelNamesSection, DwarfAccelObjCSection, - /// DwarfAccelNamespaceSection, DwarfAccelTypesSection - - /// If we use the DWARF accelerated hash tables then we want toe emit these - /// sections. - const MCSection *DwarfAccelNamesSection; - const MCSection *DwarfAccelObjCSection; - const MCSection *DwarfAccelNamespaceSection; - const MCSection *DwarfAccelTypesSection; - // Dwarf sections for debug info. If a target supports debug info, these must // be set. const MCSection *DwarfAbbrevSection; @@ -106,6 +97,28 @@ protected: const MCSection *DwarfARangesSection; const MCSection *DwarfRangesSection; const MCSection *DwarfMacroInfoSection; + // The pubnames section is no longer generated by default. The generation + // can be enabled by a compiler flag. + const MCSection *DwarfPubNamesSection; + + // DWARF5 Experimental Debug Info Sections + /// DwarfAccelNamesSection, DwarfAccelObjCSection, + /// DwarfAccelNamespaceSection, DwarfAccelTypesSection - + /// If we use the DWARF accelerated hash tables then we want to emit these + /// sections. + const MCSection *DwarfAccelNamesSection; + const MCSection *DwarfAccelObjCSection; + const MCSection *DwarfAccelNamespaceSection; + const MCSection *DwarfAccelTypesSection; + + /// These are used for the Fission separate debug information files. + const MCSection *DwarfInfoDWOSection; + const MCSection *DwarfAbbrevDWOSection; + const MCSection *DwarfStrDWOSection; + const MCSection *DwarfLineDWOSection; + const MCSection *DwarfLocDWOSection; + const MCSection *DwarfStrOffDWOSection; + const MCSection *DwarfAddrSection; // Extra TLS Variable Data section. If the target needs to put additional // information for a TLS variable, it'll go here. @@ -195,6 +208,24 @@ public: const MCSection *getCompactUnwindSection() const{ return CompactUnwindSection; } + const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } + const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } + const MCSection *getDwarfLineSection() const { return DwarfLineSection; } + const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; } + const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;} + const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;} + const MCSection *getDwarfDebugInlineSection() const { + return DwarfDebugInlineSection; + } + const MCSection *getDwarfStrSection() const { return DwarfStrSection; } + const MCSection *getDwarfLocSection() const { return DwarfLocSection; } + const MCSection *getDwarfARangesSection() const { return DwarfARangesSection;} + const MCSection *getDwarfRangesSection() const { return DwarfRangesSection; } + const MCSection *getDwarfMacroInfoSection() const { + return DwarfMacroInfoSection; + } + + // DWARF5 Experimental Debug Info Sections const MCSection *getDwarfAccelNamesSection() const { return DwarfAccelNamesSection; } @@ -207,21 +238,28 @@ public: const MCSection *getDwarfAccelTypesSection() const { return DwarfAccelTypesSection; } - const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } - const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } - const MCSection *getDwarfLineSection() const { return DwarfLineSection; } - const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; } - const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;} - const MCSection *getDwarfDebugInlineSection() const { - return DwarfDebugInlineSection; + const MCSection *getDwarfInfoDWOSection() const { + return DwarfInfoDWOSection; } - const MCSection *getDwarfStrSection() const { return DwarfStrSection; } - const MCSection *getDwarfLocSection() const { return DwarfLocSection; } - const MCSection *getDwarfARangesSection() const { return DwarfARangesSection;} - const MCSection *getDwarfRangesSection() const { return DwarfRangesSection; } - const MCSection *getDwarfMacroInfoSection() const { - return DwarfMacroInfoSection; + const MCSection *getDwarfAbbrevDWOSection() const { + return DwarfAbbrevDWOSection; } + const MCSection *getDwarfStrDWOSection() const { + return DwarfStrDWOSection; + } + const MCSection *getDwarfLineDWOSection() const { + return DwarfLineDWOSection; + } + const MCSection *getDwarfLocDWOSection() const { + return DwarfLocDWOSection; + } + const MCSection *getDwarfStrOffDWOSection() const { + return DwarfStrOffDWOSection; + } + const MCSection *getDwarfAddrSection() const { + return DwarfAddrSection; + } + const MCSection *getTLSExtraDataSection() const { return TLSExtraDataSection; } diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index 08b00f1c478e..f06c49ff082a 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -38,13 +38,18 @@ class MCObjectStreamer : public MCStreamer { virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame); protected: - MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, + MCObjectStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter); - MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, + MCObjectStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter, MCAssembler *_Assembler); ~MCObjectStreamer(); +public: + /// state management + virtual void reset(); + +protected: MCSectionData *getCurrentSectionData() const { return CurSectionData; } @@ -64,6 +69,8 @@ public: /// @{ virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitDebugLabel(MCSymbol *Symbol); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, unsigned AddrSpace); virtual void EmitULEB128Value(const MCExpr *Value); @@ -71,8 +78,15 @@ public: virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); virtual void ChangeSection(const MCSection *Section); virtual void EmitInstruction(const MCInst &Inst); + + /// \brief Emit an instruction to a special fragment, because this instruction + /// can change its size during relaxation. virtual void EmitInstToFragment(const MCInst &Inst); - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitBundleAlignMode(unsigned AlignPow2); + virtual void EmitBundleLock(bool AlignToEnd); + virtual void EmitBundleUnlock(); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace = 0); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -89,10 +103,14 @@ public: virtual void EmitGPRel32Value(const MCExpr *Value); virtual void EmitGPRel64Value(const MCExpr *Value); virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace); + unsigned AddrSpace = 0); virtual void FinishImpl(); /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() >= SK_ELFStreamer && S->getKind() <= SK_WinCOFFStreamer; + } }; } // end namespace llvm diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 14fe75fd4c31..4939a3f1fb07 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -10,9 +10,10 @@ #ifndef LLVM_MC_MCOBJECTWRITER_H #define LLVM_MC_MCOBJECTWRITER_H -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -51,6 +52,9 @@ protected: // Can only create subclasses. public: virtual ~MCObjectWriter(); + /// lifetime management + virtual void reset() { } + bool isLittleEndian() const { return IsLittleEndian; } raw_ostream &getStream() { return OS; } @@ -58,15 +62,15 @@ public: /// @name High-Level API /// @{ - /// Perform any late binding of symbols (for example, to assign symbol indices - /// for use when generating relocations). + /// \brief Perform any late binding of symbols (for example, to assign symbol + /// indices for use when generating relocations). /// /// This routine is called by the assembler after layout and relaxation is /// complete. virtual void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) = 0; - /// Record a relocation entry. + /// \brief Record a relocation entry. /// /// This routine is called by the assembler after layout and relaxation, and /// post layout binding. The implementation is responsible for storing @@ -96,8 +100,7 @@ public: bool InSet, bool IsPCRel) const; - - /// Write the object file. + /// \brief Write the object file. /// /// This routine is called by the assembler after layout and relaxation is /// complete, fixups have been evaluated and applied, and relocations @@ -173,7 +176,13 @@ public: OS << StringRef(Zeros, N % 16); } + void WriteBytes(const SmallVectorImpl &ByteVec, unsigned ZeroFillSize = 0) { + WriteBytes(StringRef(ByteVec.data(), ByteVec.size()), ZeroFillSize); + } + void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + // TODO: this version may need to go away once all fragment contents are + // converted to SmallVector assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) && "data size greater than fill size, unexpected large write will occur"); OS << Str; diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h index 92a115eb8038..a918b5600ed5 100644 --- a/include/llvm/MC/MCParser/AsmCond.h +++ b/include/llvm/MC/MCParser/AsmCond.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ASMCOND_H -#define ASMCOND_H +#ifndef LLVM_MC_MCPARSER_ASMCOND_H +#define LLVM_MC_MCPARSER_ASMCOND_H namespace llvm { diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index e102dfb82c4a..0dab31489fbb 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ASMLEXER_H -#define ASMLEXER_H +#ifndef LLVM_MC_MCPARSER_ASMLEXER_H +#define LLVM_MC_MCPARSER_ASMLEXER_H #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCParser/MCAsmLexer.h" diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index 0a961d6d0971..53b380f12f71 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCASMLEXER_H -#define LLVM_MC_MCASMLEXER_H +#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H +#define LLVM_MC_MCPARSER_MCASMLEXER_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -34,9 +34,6 @@ public: // Real values. Real, - // Register values (stored in IntVal). Only used by MCTargetAsmLexer. - Register, - // No-value. EndOfStatement, Colon, @@ -104,13 +101,6 @@ public: assert(Kind == Integer && "This token isn't an integer!"); return IntVal; } - - /// getRegVal - Get the register number for the current token, which should - /// be a register. - unsigned getRegVal() const { - assert(Kind == Register && "This token isn't a register!"); - return static_cast(IntVal); - } }; /// MCAsmLexer - Generic assembler lexer interface, for use by target specific diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index a71d3c321741..d7e3902ac478 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -7,14 +7,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCASMPARSER_H -#define LLVM_MC_MCASMPARSER_H +#ifndef LLVM_MC_MCPARSER_MCASMPARSER_H +#define LLVM_MC_MCPARSER_MCASMPARSER_H -#include "llvm/Support/DataTypes.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/Support/DataTypes.h" namespace llvm { -class AsmToken; class MCAsmInfo; class MCAsmLexer; class MCAsmParserExtension; @@ -22,13 +23,11 @@ class MCContext; class MCExpr; class MCInstPrinter; class MCInstrInfo; -class MCParsedAsmOperand; class MCStreamer; class MCTargetAsmParser; class SMLoc; class SMRange; class SourceMgr; -class StringRef; class Twine; /// MCAsmParserSemaCallback - Generic Sema callback for assembly parser. @@ -36,16 +35,21 @@ class MCAsmParserSemaCallback { public: virtual ~MCAsmParserSemaCallback(); virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc, - unsigned &Size) = 0; + unsigned &Length, unsigned &Size, + unsigned &Type, bool &IsVarDecl) = 0; + virtual bool LookupInlineAsmField(StringRef Base, StringRef Member, unsigned &Offset) = 0; }; + /// MCAsmParser - Generic assembler parser interface, for use by target specific /// assembly parsers. class MCAsmParser { public: typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc); + typedef std::pair + ExtensionDirectiveHandler; private: MCAsmParser(const MCAsmParser &) LLVM_DELETED_FUNCTION; @@ -61,9 +65,8 @@ protected: // Can only create subclasses. public: virtual ~MCAsmParser(); - virtual void AddDirectiveHandler(MCAsmParserExtension *Object, - StringRef Directive, - DirectiveHandler Handler) = 0; + virtual void addDirectiveHandler(StringRef Directive, + ExtensionDirectiveHandler Handler) = 0; virtual SourceMgr &getSourceManager() = 0; @@ -89,8 +92,8 @@ public: virtual void setParsingInlineAsm(bool V) = 0; virtual bool isParsingInlineAsm() = 0; - /// ParseMSInlineAsm - Parse ms-style inline assembly. - virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + /// parseMSInlineAsm - Parse ms-style inline assembly. + virtual bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl > &OpDecls, SmallVectorImpl &Constraints, @@ -123,42 +126,50 @@ public: bool TokError(const Twine &Msg, ArrayRef Ranges = ArrayRef()); - /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// parseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. - virtual bool ParseIdentifier(StringRef &Res) = 0; + virtual bool parseIdentifier(StringRef &Res) = 0; /// \brief Parse up to the end of statement and return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. - virtual StringRef ParseStringToEndOfStatement() = 0; + virtual StringRef parseStringToEndOfStatement() = 0; - /// EatToEndOfStatement - Skip to the end of the current statement, for error + /// parseEscapedString - Parse the current token as a string which may include + /// escaped characters and return the string contents. + virtual bool parseEscapedString(std::string &Data) = 0; + + /// eatToEndOfStatement - Skip to the end of the current statement, for error /// recovery. - virtual void EatToEndOfStatement() = 0; + virtual void eatToEndOfStatement() = 0; - /// ParseExpression - Parse an arbitrary expression. + /// parseExpression - Parse an arbitrary expression. /// /// @param Res - The value of the expression. The result is undefined /// on error. /// @result - False on success. - virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0; - bool ParseExpression(const MCExpr *&Res); + virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0; + bool parseExpression(const MCExpr *&Res); - /// ParseParenExpression - Parse an arbitrary expression, assuming that an + /// parseParenExpression - Parse an arbitrary expression, assuming that an /// initial '(' has already been consumed. /// /// @param Res - The value of the expression. The result is undefined /// on error. /// @result - False on success. - virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0; + virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0; - /// ParseAbsoluteExpression - Parse an expression which must evaluate to an + /// parseAbsoluteExpression - Parse an expression which must evaluate to an /// absolute value. /// /// @param Res - The value of the absolute expression. The result is undefined /// on error. /// @result - False on success. - virtual bool ParseAbsoluteExpression(int64_t &Res) = 0; + virtual bool parseAbsoluteExpression(int64_t &Res) = 0; + + /// checkForValidSection - Ensure that we have a valid section set in the + /// streamer. Otherwise, report an error and switch to .text. + virtual void checkForValidSection() = 0; }; /// \brief Create an MCAsmParser instance. diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h index 0918c93bdf3d..2eda3a9a2143 100644 --- a/include/llvm/MC/MCParser/MCAsmParserExtension.h +++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCASMPARSEREXTENSION_H -#define LLVM_MC_MCASMPARSEREXTENSION_H +#ifndef LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H +#define LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H -#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/Support/SMLoc.h" namespace llvm { diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h index 60e7887a5396..4650bf21be7e 100644 --- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCASMOPERAND_H -#define LLVM_MC_MCASMOPERAND_H +#ifndef LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H +#define LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H namespace llvm { class SMLoc; @@ -57,18 +57,15 @@ public: /// isMem - Is this a memory operand? virtual bool isMem() const = 0; - virtual unsigned getMemSize() const { return 0; } /// getStartLoc - Get the location of the first token of this operand. virtual SMLoc getStartLoc() const = 0; /// getEndLoc - Get the location of the last token of this operand. virtual SMLoc getEndLoc() const = 0; - /// needAsmRewrite - AsmRewrites happen in both the target-independent and - /// target-dependent parsers. The target-independent parser calls this - /// function to determine if the target-dependent parser has already taken - /// care of the rewrites. Only valid when parsing MS-style inline assembly. - virtual bool needAsmRewrite() const { return true; } + /// needAddressOf - Do we need to emit code to get the address of the + /// variable/label? Only valid when parsing MS-style inline assembly. + virtual bool needAddressOf() const { return false; } /// isOffsetOf - Do we need to emit code to get the offset of the variable, /// rather then the value of the variable? Only valid when parsing MS-style @@ -78,10 +75,6 @@ public: /// getOffsetOfLoc - Get the location of the offset operator. virtual SMLoc getOffsetOfLoc() const { return SMLoc(); } - /// needSizeDirective - Do we need to emit a sizing directive for this - /// operand? Only valid when parsing MS-style inline assembly. - virtual bool needSizeDirective() const { return false; } - /// print - Print a debug representation of the operand to the given stream. virtual void print(raw_ostream &OS) const = 0; /// dump - Print to the debug stream. diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index f05baeaaf689..f5b4dddc5198 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -22,11 +22,15 @@ namespace llvm { +/// An unsigned integer type large enough to represent all physical registers, +/// but not necessarily virtual registers. +typedef uint16_t MCPhysReg; + /// MCRegisterClass - Base class of TargetRegisterClass. class MCRegisterClass { public: - typedef const uint16_t* iterator; - typedef const uint16_t* const_iterator; + typedef const MCPhysReg* iterator; + typedef const MCPhysReg* const_iterator; const char *Name; const iterator RegsBegin; @@ -148,11 +152,12 @@ private: const MCRegisterDesc *Desc; // Pointer to the descriptor array unsigned NumRegs; // Number of entries in the array unsigned RAReg; // Return address register + unsigned PCReg; // Program counter register const MCRegisterClass *Classes; // Pointer to the regclass array unsigned NumClasses; // Number of entries in the array unsigned NumRegUnits; // Number of regunits. const uint16_t (*RegUnitRoots)[2]; // Pointer to regunit root table. - const uint16_t *DiffLists; // Pointer to the difflists array + const MCPhysReg *DiffLists; // Pointer to the difflists array const char *RegStrings; // Pointer to the string table. const uint16_t *SubRegIndices; // Pointer to the subreg lookup // array. @@ -177,7 +182,7 @@ public: /// defined below. class DiffListIterator { uint16_t Val; - const uint16_t *List; + const MCPhysReg *List; protected: /// Create an invalid iterator. Call init() to point to something useful. @@ -186,7 +191,7 @@ public: /// init - Point the iterator to InitVal, decoding subsequent values from /// DiffList. The iterator will initially point to InitVal, sub-classes are /// responsible for skipping the seed value if it is not part of the list. - void init(uint16_t InitVal, const uint16_t *DiffList) { + void init(MCPhysReg InitVal, const MCPhysReg *DiffList) { Val = InitVal; List = DiffList; } @@ -196,7 +201,7 @@ public: /// is the caller's responsibility (by checking for a 0 return value). unsigned advance() { assert(isValid() && "Cannot move off the end of the list."); - uint16_t D = *List++; + MCPhysReg D = *List++; Val += D; return D; } @@ -225,13 +230,14 @@ public: friend class MCRegUnitIterator; friend class MCRegUnitRootIterator; - /// InitMCRegisterInfo - Initialize MCRegisterInfo, called by TableGen + /// \brief Initialize MCRegisterInfo, called by TableGen /// auto-generated routines. *DO NOT USE*. void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA, + unsigned PC, const MCRegisterClass *C, unsigned NC, const uint16_t (*RURoots)[2], unsigned NRU, - const uint16_t *DL, + const MCPhysReg *DL, const char *Strings, const uint16_t *SubIndices, unsigned NumIndices, @@ -239,6 +245,7 @@ public: Desc = D; NumRegs = NR; RAReg = RA; + PCReg = PC; Classes = C; DiffLists = DL; RegStrings = Strings; @@ -250,7 +257,7 @@ public: RegEncodingTable = RET; } - /// mapLLVMRegsToDwarfRegs - Used to initialize LLVM register to Dwarf + /// \brief Used to initialize LLVM register to Dwarf /// register number mapping. Called by TableGen auto-generated routines. /// *DO NOT USE*. void mapLLVMRegsToDwarfRegs(const DwarfLLVMRegPair *Map, unsigned Size, @@ -264,7 +271,7 @@ public: } } - /// mapDwarfRegsToLLVMRegs - Used to initialize Dwarf register to LLVM + /// \brief Used to initialize Dwarf register to LLVM /// register number mapping. Called by TableGen auto-generated routines. /// *DO NOT USE*. void mapDwarfRegsToLLVMRegs(const DwarfLLVMRegPair *Map, unsigned Size, @@ -287,77 +294,80 @@ public: L2SEHRegs[LLVMReg] = SEHReg; } - /// getRARegister - This method should return the register where the return + /// \brief This method should return the register where the return /// address can be found. unsigned getRARegister() const { return RAReg; } + /// Return the register which is the program counter. + unsigned getProgramCounter() const { + return PCReg; + } + const MCRegisterDesc &operator[](unsigned RegNo) const { assert(RegNo < NumRegs && "Attempting to access record for invalid register number!"); return Desc[RegNo]; } - /// Provide a get method, equivalent to [], but more useful if we have a + /// \brief Provide a get method, equivalent to [], but more useful with a /// pointer to this object. - /// const MCRegisterDesc &get(unsigned RegNo) const { return operator[](RegNo); } - /// getSubReg - Returns the physical register number of sub-register "Index" + /// \brief Returns the physical register number of sub-register "Index" /// for physical register RegNo. Return zero if the sub-register does not /// exist. unsigned getSubReg(unsigned Reg, unsigned Idx) const; - /// getMatchingSuperReg - Return a super-register of the specified register + /// \brief Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const MCRegisterClass *RC) const; - /// getSubRegIndex - For a given register pair, return the sub-register index + /// \brief For a given register pair, return the sub-register index /// if the second register is a sub-register of the first. Return zero /// otherwise. unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const; - /// getName - Return the human-readable symbolic target-specific name for the + /// \brief Return the human-readable symbolic target-specific name for the /// specified physical register. const char *getName(unsigned RegNo) const { return RegStrings + get(RegNo).Name; } - /// getNumRegs - Return the number of registers this target has (useful for + /// \brief Return the number of registers this target has (useful for /// sizing arrays holding per register information) unsigned getNumRegs() const { return NumRegs; } - /// getNumSubRegIndices - Return the number of sub-register indices + /// \brief Return the number of sub-register indices /// understood by the target. Index 0 is reserved for the no-op sub-register, /// while 1 to getNumSubRegIndices() - 1 represent real sub-registers. unsigned getNumSubRegIndices() const { return NumSubRegIndices; } - /// getNumRegUnits - Return the number of (native) register units in the + /// \brief Return the number of (native) register units in the /// target. Register units are numbered from 0 to getNumRegUnits() - 1. They /// can be accessed through MCRegUnitIterator defined below. unsigned getNumRegUnits() const { return NumRegUnits; } - /// getDwarfRegNum - Map a target register to an equivalent dwarf register + /// \brief Map a target register to an equivalent dwarf register /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and /// debugging info. int getDwarfRegNum(unsigned RegNum, bool isEH) const; - /// getLLVMRegNum - Map a dwarf register back to a target register. - /// + /// \brief Map a dwarf register back to a target register. int getLLVMRegNum(unsigned RegNum, bool isEH) const; - /// getSEHRegNum - Map a target register to an equivalent SEH register + /// \brief Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. int getSEHRegNum(unsigned RegNum) const; @@ -368,20 +378,39 @@ public: return (unsigned)(regclass_end()-regclass_begin()); } - /// getRegClass - Returns the register class associated with the enumeration + /// \brief Returns the register class associated with the enumeration /// value. See class MCOperandInfo. const MCRegisterClass& getRegClass(unsigned i) const { assert(i < getNumRegClasses() && "Register Class ID out of range"); return Classes[i]; } - /// getEncodingValue - Returns the encoding for RegNo + /// \brief Returns the encoding for RegNo uint16_t getEncodingValue(unsigned RegNo) const { assert(RegNo < NumRegs && "Attempting to get encoding for invalid register number!"); return RegEncodingTable[RegNo]; } + /// \brief Returns true if RegB is a sub-register of RegA. + bool isSubRegister(unsigned RegA, unsigned RegB) const { + return isSuperRegister(RegB, RegA); + } + + /// \brief Returns true if RegB is a super-register of RegA. + bool isSuperRegister(unsigned RegA, unsigned RegB) const; + + /// \brief Returns true if RegB is a sub-register of RegA or if RegB == RegA. + bool isSubRegisterEq(unsigned RegA, unsigned RegB) const { + return isSuperRegisterEq(RegB, RegA); + } + + /// \brief Returns true if RegB is a super-register of RegA or if + /// RegB == RegA. + bool isSuperRegisterEq(unsigned RegA, unsigned RegB) const { + return RegA == RegB || isSuperRegister(RegA, RegB); + } + }; //===----------------------------------------------------------------------===// @@ -422,6 +451,15 @@ public: } }; +// Definition for isSuperRegister. Put it down here since it needs the +// iterator defined above in addition to the MCRegisterInfo class itself. +inline bool MCRegisterInfo::isSuperRegister(unsigned RegA, unsigned RegB) const{ + for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I) + if (*I == RegB) + return true; + return false; +} + //===----------------------------------------------------------------------===// // Register Units //===----------------------------------------------------------------------===// @@ -441,6 +479,7 @@ public: /// MCRegUnitIterator - Create an iterator that traverses the register units /// in Reg. MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) { + assert(Reg && "Null register has no regunits"); // Decode the RegUnits MCRegisterDesc field. unsigned RU = MCRI->get(Reg).RegUnits; unsigned Scale = RU & 15; @@ -480,17 +519,17 @@ public: Reg1 = MCRI->RegUnitRoots[RegUnit][1]; } - /// Dereference to get the current root register. + /// \brief Dereference to get the current root register. unsigned operator*() const { return Reg0; } - /// isValid - Check if the iterator is at the end of the list. + /// \brief Check if the iterator is at the end of the list. bool isValid() const { return Reg0; } - /// Preincrement to move to the next root register. + /// \brief Preincrement to move to the next root register. void operator++() { assert(isValid() && "Cannot move off the end of the list."); Reg0 = Reg1; diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index 0c71ee513500..defa29903543 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCSCHEDMODEL_H -#define LLVM_MC_MCSCHEDMODEL_H +#ifndef LLVM_MC_MCSCHEDULE_H +#define LLVM_MC_MCSCHEDULE_H #include "llvm/Support/DataTypes.h" #include @@ -155,7 +155,7 @@ public: // Optional InstrItinerary OperandCycles provides expected latency. // TODO: can't yet specify both min and expected latency per operand. int MinLatency; - static const unsigned DefaultMinLatency = -1; + static const int DefaultMinLatency = -1; // LoadLatency is the expected latency of load instructions. // @@ -172,6 +172,16 @@ public: unsigned HighLatency; static const unsigned DefaultHighLatency = 10; + // ILPWindow is the number of cycles that the scheduler effectively ignores + // before attempting to hide latency. This should be zero for in-order cpus to + // always hide expected latency. For out-of-order cpus, it may be tweaked as + // desired to roughly approximate instruction buffers. The actual threshold is + // not very important for an OOO processor, as long as it isn't too high. A + // nonzero value helps avoid rescheduling to hide latency when its is fairly + // obviously useless and makes register pressure heuristics more effective. + unsigned ILPWindow; + static const unsigned DefaultILPWindow = 0; + // MispredictPenalty is the typical number of extra cycles the processor // takes to recover from a branch misprediction. unsigned MispredictPenalty; @@ -196,6 +206,7 @@ public: MinLatency(DefaultMinLatency), LoadLatency(DefaultLoadLatency), HighLatency(DefaultHighLatency), + ILPWindow(DefaultILPWindow), MispredictPenalty(DefaultMispredictPenalty), ProcID(0), ProcResourceTable(0), SchedClassTable(0), NumProcResourceKinds(0), NumSchedClasses(0), @@ -205,12 +216,12 @@ public: } // Table-gen driven ctor. - MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp, - unsigned pi, const MCProcResourceDesc *pr, + MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp, + unsigned mp, unsigned pi, const MCProcResourceDesc *pr, const MCSchedClassDesc *sc, unsigned npr, unsigned nsc, const InstrItinerary *ii): IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl), - MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), + ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc), InstrItineraries(ii) {} diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 21fdb6bd39b8..e5754249e91b 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -14,6 +14,7 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Compiler.h" @@ -49,6 +50,11 @@ namespace llvm { virtual void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const = 0; + // Convenience routines to get label names for the beginning/end of a + // section. + virtual std::string getLabelBeginName() const = 0; + virtual std::string getLabelEndName() const = 0; + /// isBaseAddressKnownZero - Return true if we know that this section will /// get a base address of zero. In cases where we know that this is true we /// can emit section offsets as direct references to avoid a subtraction diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h index b050c0f442b6..07c47144cbdc 100644 --- a/include/llvm/MC/MCSectionCOFF.h +++ b/include/llvm/MC/MCSectionCOFF.h @@ -14,9 +14,9 @@ #ifndef LLVM_MC_MCSECTIONCOFF_H #define LLVM_MC_MCSECTIONCOFF_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCSection.h" #include "llvm/Support/COFF.h" -#include "llvm/ADT/StringRef.h" namespace llvm { @@ -50,6 +50,12 @@ namespace llvm { bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; StringRef getSectionName() const { return SectionName; } + virtual std::string getLabelBeginName() const { + return SectionName.str() + "_begin"; + } + virtual std::string getLabelEndName() const { + return SectionName.str() + "_end"; + } unsigned getCharacteristics() const { return Characteristics; } int getSelection () const { return Selection; } diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h index 4d54465760d4..4b8b849c79ed 100644 --- a/include/llvm/MC/MCSectionELF.h +++ b/include/llvm/MC/MCSectionELF.h @@ -14,9 +14,11 @@ #ifndef LLVM_MC_MCSECTIONELF_H #define LLVM_MC_MCSECTIONELF_H -#include "llvm/MC/MCSection.h" -#include "llvm/Support/ELF.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCSection.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -57,6 +59,11 @@ public: bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; StringRef getSectionName() const { return SectionName; } + virtual std::string getLabelBeginName() const { + return SectionName.str() + "_begin"; } + virtual std::string getLabelEndName() const { + return SectionName.str() + "_end"; + } unsigned getType() const { return Type; } unsigned getFlags() const { return Flags; } unsigned getEntrySize() const { return EntrySize; } diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h index 71ea8f3e901d..898f5714907f 100644 --- a/include/llvm/MC/MCSectionMachO.h +++ b/include/llvm/MC/MCSectionMachO.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCSECTIONMACHO_H #define LLVM_MC_MCSECTIONMACHO_H -#include "llvm/MC/MCSection.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCSection.h" namespace llvm { @@ -145,6 +145,14 @@ public: return StringRef(SectionName); } + virtual std::string getLabelBeginName() const { + return StringRef(getSegmentName().str() + getSectionName().str() + "_begin"); + } + + virtual std::string getLabelEndName() const { + return StringRef(getSegmentName().str() + getSectionName().str() + "_end"); + } + unsigned getTypeAndAttributes() const { return TypeAndAttributes; } unsigned getStubSize() const { return Reserved2; } diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 230d27ef2ef0..a069a2b0cafa 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -14,12 +14,14 @@ #ifndef LLVM_MC_MCSTREAMER_H #define LLVM_MC_MCSTREAMER_H -#include "llvm/Support/DataTypes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCWin64EH.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" +#include namespace llvm { class MCAsmBackend; @@ -45,6 +47,23 @@ namespace llvm { /// a .s file, and implementations that write out .o files of various formats. /// class MCStreamer { + public: + enum StreamerKind { + SK_AsmStreamer, + SK_NullStreamer, + SK_RecordStreamer, + + // MCObjectStreamer subclasses. + SK_ELFStreamer, + SK_ARMELFStreamer, + SK_MachOStreamer, + SK_PureStreamer, + SK_MipsELFStreamer, + SK_WinCOFFStreamer + }; + + private: + const StreamerKind Kind; MCContext &Context; MCStreamer(const MCStreamer&) LLVM_DELETED_FUNCTION; @@ -55,6 +74,7 @@ namespace llvm { std::vector FrameInfos; MCDwarfFrameInfo *getCurrentFrameInfo(); + MCSymbol *EmitCFICommon(); void EnsureValidFrame(); std::vector W64UnwindInfos; @@ -69,8 +89,10 @@ namespace llvm { SmallVector, 4> SectionStack; + bool AutoInitSections; + protected: - MCStreamer(MCContext &Ctx); + MCStreamer(StreamerKind Kind, MCContext &Ctx); const MCExpr *BuildSymbolDiff(MCContext &Context, const MCSymbol *A, const MCSymbol *B); @@ -89,6 +111,12 @@ namespace llvm { public: virtual ~MCStreamer(); + StreamerKind getKind() const { return Kind; } + + /// State management + /// + virtual void reset(); + MCContext &getContext() const { return Context; } unsigned getNumFrameInfos() { @@ -213,9 +241,23 @@ namespace llvm { SectionStack.back().first = Section; } + /// Initialize the streamer. + void InitStreamer() { + if (AutoInitSections) + InitSections(); + } + + /// Tell this MCStreamer to call InitSections upon initialization. + void setAutoInitSections(bool AutoInitSections) { + this->AutoInitSections = AutoInitSections; + } + /// InitSections - Create the default sections and set the initial one. virtual void InitSections() = 0; + /// InitToTextSection - Create a text section and switch the streamer to it. + virtual void InitToTextSection() = 0; + /// EmitLabel - Emit a label for @p Symbol into the current section. /// /// This corresponds to an assembler statement such as: @@ -226,12 +268,18 @@ namespace llvm { /// used in an assignment. virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitDebugLabel(MCSymbol *Symbol); + virtual void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol); /// EmitAssemblerFlag - Note in the output the specified @p Flag. virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0; + /// EmitLinkerOptions - Emit the given list @p Options of strings as linker + /// options into the output. + virtual void EmitLinkerOptions(ArrayRef Kind) {} + /// EmitDataRegion - Note in the output the specified region @p Kind. virtual void EmitDataRegion(MCDataRegionType Kind) {} @@ -239,6 +287,9 @@ namespace llvm { /// a Thumb mode function (ARM target only). virtual void EmitThumbFunc(MCSymbol *Func) = 0; + /// getOrCreateSymbolData - Get symbol data for given symbol. + virtual MCSymbolData &getOrCreateSymbolData(MCSymbol *Symbol); + /// EmitAssignment - Emit an assignment of @p Value to @p Symbol. /// /// This corresponds to an assembler statement such as: @@ -346,7 +397,7 @@ namespace llvm { /// /// This is used to implement assembler directives such as .byte, .ascii, /// etc. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) = 0; + virtual void EmitBytes(StringRef Data, unsigned AddrSpace = 0) = 0; /// EmitValue - Emit the expression @p Value into the output as a native /// integer of the given @p Size bytes. @@ -380,8 +431,8 @@ namespace llvm { /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. - void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0, - unsigned Padding = 0); + void EmitULEB128IntValue(uint64_t Value, unsigned Padding = 0, + unsigned AddrSpace = 0); /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. @@ -409,15 +460,14 @@ namespace llvm { /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace); + unsigned AddrSpace = 0); /// EmitZeros - Emit NumBytes worth of zeros. This is a convenience /// function that just wraps EmitFill. - void EmitZeros(uint64_t NumBytes, unsigned AddrSpace) { + void EmitZeros(uint64_t NumBytes, unsigned AddrSpace = 0) { EmitFill(NumBytes, 0, AddrSpace); } - /// EmitValueToAlignment - Emit some number of copies of @p Value until /// the byte alignment @p ByteAlignment is reached. /// @@ -475,7 +525,7 @@ namespace llvm { /// file number. This implements the DWARF2 '.file 4 "foo.c"' assembler /// directive. virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename); + StringRef Filename, unsigned CUID = 0); /// EmitDwarfLocDirective - This implements the DWARF2 // '.loc fileno lineno ...' assembler directive. @@ -515,6 +565,8 @@ namespace llvm { virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); virtual void EmitCFIEscape(StringRef Values); virtual void EmitCFISignalFrame(); + virtual void EmitCFIUndefined(int64_t Register); + virtual void EmitCFIRegister(int64_t Register1, int64_t Register2); virtual void EmitWin64EHStartProc(const MCSymbol *Symbol); virtual void EmitWin64EHEndProc(); @@ -535,6 +587,20 @@ namespace llvm { /// section. virtual void EmitInstruction(const MCInst &Inst) = 0; + /// \brief Set the bundle alignment mode from now on in the section. + /// The argument is the power of 2 to which the alignment is set. The + /// value 0 means turn the bundle alignment off. + virtual void EmitBundleAlignMode(unsigned AlignPow2) = 0; + + /// \brief The following instructions are a bundle-locked group. + /// + /// \param AlignToEnd - If true, the bundle-locked group will be aligned to + /// the end of a bundle. + virtual void EmitBundleLock(bool AlignToEnd) = 0; + + /// \brief Ends a bundle-locked group. + virtual void EmitBundleUnlock() = 0; + /// EmitRawText - If this file is backed by a assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. By default this aborts. diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index 69213cd77d92..346fb2df0ffc 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCSUBTARGET_H #define LLVM_MC_MCSUBTARGET_H -#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/SubtargetFeature.h" #include namespace llvm { diff --git a/include/llvm/MC/MCTargetAsmLexer.h b/include/llvm/MC/MCTargetAsmLexer.h deleted file mode 100644 index b1cc546e1efa..000000000000 --- a/include/llvm/MC/MCTargetAsmLexer.h +++ /dev/null @@ -1,89 +0,0 @@ -//===-- llvm/MC/MCTargetAsmLexer.h - Target Assembly Lexer ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCTARGETASMLEXER_H -#define LLVM_MC_MCTARGETASMLEXER_H - -#include "llvm/MC/MCParser/MCAsmLexer.h" - -namespace llvm { -class Target; - -/// MCTargetAsmLexer - Generic interface to target specific assembly lexers. -class MCTargetAsmLexer { - /// The current token - AsmToken CurTok; - - /// The location and description of the current error - SMLoc ErrLoc; - std::string Err; - - MCTargetAsmLexer(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION; - void operator=(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION; -protected: // Can only create subclasses. - MCTargetAsmLexer(const Target &); - - virtual AsmToken LexToken() = 0; - - void SetError(const SMLoc &errLoc, const std::string &err) { - ErrLoc = errLoc; - Err = err; - } - - /// TheTarget - The Target that this machine was created for. - const Target &TheTarget; - MCAsmLexer *Lexer; - -public: - virtual ~MCTargetAsmLexer(); - - const Target &getTarget() const { return TheTarget; } - - /// InstallLexer - Set the lexer to get tokens from lower-level lexer \p L. - void InstallLexer(MCAsmLexer &L) { - Lexer = &L; - } - - MCAsmLexer *getLexer() { - return Lexer; - } - - /// Lex - Consume the next token from the input stream and return it. - const AsmToken &Lex() { - return CurTok = LexToken(); - } - - /// getTok - Get the current (last) lexed token. - const AsmToken &getTok() { - return CurTok; - } - - /// getErrLoc - Get the current error location - const SMLoc &getErrLoc() { - return ErrLoc; - } - - /// getErr - Get the current error string - const std::string &getErr() { - return Err; - } - - /// getKind - Get the kind of current token. - AsmToken::TokenKind getKind() const { return CurTok.getKind(); } - - /// is - Check if the current token has kind \p K. - bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } - - /// isNot - Check if the current token has kind \p K. - bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } -}; - -} // End llvm namespace - -#endif diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h index 483a80b3b595..4c5b17612569 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCTargetAsmParser.h @@ -22,6 +22,7 @@ class MCInst; template class SmallVectorImpl; enum AsmRewriteKind { + AOK_Align, // Rewrite align as .align. AOK_DotOperator, // Rewrite a dot operator expression as an immediate. // E.g., [eax].foo.bar -> [eax].8 AOK_Emit, // Rewrite _emit as .byte. @@ -142,6 +143,15 @@ public: MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm) = 0; + /// Allow a target to add special case operand matching for things that + /// tblgen doesn't/can't handle effectively. For example, literal + /// immediates on ARM. TableGen expects a token operand, but the parser + /// will recognize them as immediates. + virtual unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned Kind) { + return Match_InvalidOperand; + } + /// checkTargetMatchPredicate - Validate the instruction match against /// any complex target predicates not expressible via match classes. virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h index f9af8bcfbf61..a4e730111484 100644 --- a/include/llvm/MC/MCValue.h +++ b/include/llvm/MC/MCValue.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCVALUE_H #define LLVM_MC_MCVALUE_H -#include "llvm/Support/DataTypes.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/DataTypes.h" #include namespace llvm { diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h index 7a0b1ffaf0a0..11df5749d450 100644 --- a/include/llvm/MC/MCWinCOFFObjectWriter.h +++ b/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -11,6 +11,9 @@ #define LLVM_MC_MCWINCOFFOBJECTWRITER_H namespace llvm { + class MCObjectWriter; + class raw_ostream; + class MCWinCOFFObjectTargetWriter { const unsigned Machine; diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h index 57f0518cbf3a..37ae03b45ca2 100644 --- a/include/llvm/MC/SubtargetFeature.h +++ b/include/llvm/MC/SubtargetFeature.h @@ -18,9 +18,9 @@ #ifndef LLVM_MC_SUBTARGETFEATURE_H #define LLVM_MC_SUBTARGETFEATURE_H -#include #include "llvm/ADT/Triple.h" #include "llvm/Support/DataTypes.h" +#include namespace llvm { class raw_ostream; diff --git a/include/llvm/MDBuilder.h b/include/llvm/MDBuilder.h deleted file mode 100644 index 1867a639236e..000000000000 --- a/include/llvm/MDBuilder.h +++ /dev/null @@ -1,162 +0,0 @@ -//===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MDBuilder class, which is used as a convenient way to -// create LLVM metadata with a consistent and simplified interface. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MDBUILDER_H -#define LLVM_MDBUILDER_H - -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/ADT/APInt.h" - -namespace llvm { - - class MDBuilder { - LLVMContext &Context; - - public: - MDBuilder(LLVMContext &context) : Context(context) {} - - /// \brief Return the given string as metadata. - MDString *createString(StringRef Str) { - return MDString::get(Context, Str); - } - - //===------------------------------------------------------------------===// - // FPMath metadata. - //===------------------------------------------------------------------===// - - /// \brief Return metadata with the given settings. The special value 0.0 - /// for the Accuracy parameter indicates the default (maximal precision) - /// setting. - MDNode *createFPMath(float Accuracy) { - if (Accuracy == 0.0) - return 0; - assert(Accuracy > 0.0 && "Invalid fpmath accuracy!"); - Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy); - return MDNode::get(Context, Op); - } - - //===------------------------------------------------------------------===// - // Prof metadata. - //===------------------------------------------------------------------===// - - /// \brief Return metadata containing two branch weights. - MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight) { - uint32_t Weights[] = { TrueWeight, FalseWeight }; - return createBranchWeights(Weights); - } - - /// \brief Return metadata containing a number of branch weights. - MDNode *createBranchWeights(ArrayRef Weights) { - assert(Weights.size() >= 2 && "Need at least two branch weights!"); - - SmallVector Vals(Weights.size()+1); - Vals[0] = createString("branch_weights"); - - Type *Int32Ty = Type::getInt32Ty(Context); - for (unsigned i = 0, e = Weights.size(); i != e; ++i) - Vals[i+1] = ConstantInt::get(Int32Ty, Weights[i]); - - return MDNode::get(Context, Vals); - } - - //===------------------------------------------------------------------===// - // Range metadata. - //===------------------------------------------------------------------===// - - /// \brief Return metadata describing the range [Lo, Hi). - MDNode *createRange(const APInt &Lo, const APInt &Hi) { - assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!"); - // If the range is everything then it is useless. - if (Hi == Lo) - return 0; - - // Return the range [Lo, Hi). - Type *Ty = IntegerType::get(Context, Lo.getBitWidth()); - Value *Range[2] = { ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi) }; - return MDNode::get(Context, Range); - } - - - //===------------------------------------------------------------------===// - // TBAA metadata. - //===------------------------------------------------------------------===// - - /// \brief Return metadata appropriate for a TBAA root node. Each returned - /// node is distinct from all other metadata and will never be identified - /// (uniqued) with anything else. - MDNode *createAnonymousTBAARoot() { - // To ensure uniqueness the root node is self-referential. - MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef()); - MDNode *Root = MDNode::get(Context, Dummy); - // At this point we have - // !0 = metadata !{} <- dummy - // !1 = metadata !{metadata !0} <- root - // Replace the dummy operand with the root node itself and delete the dummy. - Root->replaceOperandWith(0, Root); - MDNode::deleteTemporary(Dummy); - // We now have - // !1 = metadata !{metadata !1} <- self-referential root - return Root; - } - - /// \brief Return metadata appropriate for a TBAA root node with the given - /// name. This may be identified (uniqued) with other roots with the same - /// name. - MDNode *createTBAARoot(StringRef Name) { - return MDNode::get(Context, createString(Name)); - } - - /// \brief Return metadata for a non-root TBAA node with the given name, - /// parent in the TBAA tree, and value for 'pointsToConstantMemory'. - MDNode *createTBAANode(StringRef Name, MDNode *Parent, - bool isConstant = false) { - if (isConstant) { - Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1); - Value *Ops[3] = { createString(Name), Parent, Flags }; - return MDNode::get(Context, Ops); - } else { - Value *Ops[2] = { createString(Name), Parent }; - return MDNode::get(Context, Ops); - } - } - - struct TBAAStructField { - uint64_t Offset; - uint64_t Size; - MDNode *TBAA; - TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) : - Offset(Offset), Size(Size), TBAA(TBAA) {} - }; - - /// \brief Return metadata for a tbaa.struct node with the given - /// struct field descriptions. - MDNode *createTBAAStructNode(ArrayRef Fields) { - SmallVector Vals(Fields.size() * 3); - Type *Int64 = IntegerType::get(Context, 64); - for (unsigned i = 0, e = Fields.size(); i != e; ++i) { - Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset); - Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size); - Vals[i * 3 + 2] = Fields[i].TBAA; - } - return MDNode::get(Context, Vals); - } - - }; - -} // end namespace llvm - -#endif diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index f3d824960c2f..e2478f6754b0 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -14,22 +14,78 @@ #ifndef LLVM_OBJECT_ARCHIVE_H #define LLVM_OBJECT_ARCHIVE_H -#include "llvm/Object/Binary.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Binary.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" namespace llvm { namespace object { +struct ArchiveMemberHeader { + char Name[16]; + char LastModified[12]; + char UID[6]; + char GID[6]; + char AccessMode[8]; + char Size[10]; ///< Size of data, not including header or padding. + char Terminator[2]; + + ///! Get the name without looking up long names. + llvm::StringRef getName() const { + char EndCond; + if (Name[0] == '/' || Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; + llvm::StringRef::size_type end = + llvm::StringRef(Name, sizeof(Name)).find(EndCond); + if (end == llvm::StringRef::npos) + end = sizeof(Name); + assert(end <= sizeof(Name) && end > 0); + // Don't include the EndCond if there is one. + return llvm::StringRef(Name, end); + } + + uint64_t getSize() const { + uint64_t ret; + if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret)) + llvm_unreachable("Size is not an integer."); + return ret; + } +}; + +static const ArchiveMemberHeader *ToHeader(const char *base) { + return reinterpret_cast(base); +} class Archive : public Binary { virtual void anchor(); public: class Child { const Archive *Parent; + /// \brief Includes header but not padding byte. StringRef Data; + /// \brief Offset from Data to the start of the file. + uint16_t StartOfFile; public: - Child(const Archive *p, StringRef d) : Parent(p), Data(d) {} + Child(const Archive *p, StringRef d) : Parent(p), Data(d) { + if (!p || d.empty()) + return; + // Setup StartOfFile and PaddingBytes. + StartOfFile = sizeof(ArchiveMemberHeader); + // Don't include attached name. + StringRef Name = ToHeader(Data.data())->getName(); + if (Name.startswith("#1/")) { + uint64_t NameSize; + if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) + llvm_unreachable("Long name length is not an integer"); + StartOfFile += NameSize; + } + } bool operator ==(const Child &other) const { return (Parent == other.Parent) && (Data.begin() == other.Data.begin()); @@ -39,16 +95,48 @@ public: return Data.begin() < other.Data.begin(); } - Child getNext() const; + Child getNext() const { + size_t SpaceToSkip = Data.size(); + // If it's odd, add 1 to make it even. + if (SpaceToSkip & 1) + ++SpaceToSkip; + + const char *NextLoc = Data.data() + SpaceToSkip; + + // Check to see if this is past the end of the archive. + if (NextLoc >= Parent->Data->getBufferEnd()) + return Child(Parent, StringRef(0, 0)); + + size_t NextSize = + sizeof(ArchiveMemberHeader) + ToHeader(NextLoc)->getSize(); + + return Child(Parent, StringRef(NextLoc, NextSize)); + } + error_code getName(StringRef &Result) const; int getLastModified() const; int getUID() const; int getGID() const; int getAccessMode() const; - ///! Return the size of the archive member without the header or padding. - uint64_t getSize() const; + /// \return the size of the archive member without the header or padding. + uint64_t getSize() const { return Data.size() - StartOfFile; } + + StringRef getBuffer() const { + return StringRef(Data.data() + StartOfFile, getSize()); + } + + error_code getMemoryBuffer(OwningPtr &Result, + bool FullPath = false) const { + StringRef Name; + if (error_code ec = getName(Name)) + return ec; + SmallString<128> Path; + Result.reset(MemoryBuffer::getMemBuffer( + getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name + + ")").toStringRef(Path) : Name, false)); + return error_code::success(); + } - MemoryBuffer *getBuffer() const; error_code getAsBinary(OwningPtr &Result) const; }; @@ -122,6 +210,16 @@ public: Archive(MemoryBuffer *source, error_code &ec); + enum Kind { + K_GNU, + K_BSD, + K_COFF + }; + + Kind kind() const { + return Format; + } + child_iterator begin_children(bool skip_internal = true) const; child_iterator end_children() const; @@ -133,9 +231,13 @@ public: return v->isArchive(); } + // check if a symbol is in the archive + child_iterator findSym(StringRef name) const; + private: child_iterator SymbolTable; child_iterator StringTable; + Kind Format; }; } diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index d555de3accc2..8bbcd8b4d4c6 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -49,8 +49,8 @@ protected: ID_EndObjects }; - static inline unsigned int getELFType(bool isLittleEndian, bool is64Bits) { - if (isLittleEndian) + static inline unsigned int getELFType(bool isLE, bool is64Bits) { + if (isLE) return is64Bits ? ID_ELF64L : ID_ELF32L; else return is64Bits ? ID_ELF64B : ID_ELF32B; @@ -85,6 +85,10 @@ public: bool isCOFF() const { return TypeID == ID_COFF; } + + bool isLittleEndian() const { + return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B); + } }; /// @brief Create a Binary from Source, autodetecting the file type. diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 466de93a78b2..8ea5e46e09e8 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -14,11 +14,11 @@ #ifndef LLVM_OBJECT_ELF_H #define LLVM_OBJECT_ELF_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" @@ -33,6 +33,21 @@ namespace llvm { namespace object { +using support::endianness; + +template +struct ELFType { + static const endianness TargetEndianness = target_endianness; + static const std::size_t MaxAlignment = max_alignment; + static const bool Is64Bits = is64Bits; +}; + +template +struct MaximumAlignment { + enum {value = AlignOf::Alignment > max_align ? max_align + : AlignOf::Alignment}; +}; + // Subclasses of ELFObjectFile may need this for template instantiation inline std::pair getElfArchType(MemoryBuffer *Object) { @@ -43,69 +58,78 @@ getElfArchType(MemoryBuffer *Object) { } // Templates to choose Elf_Addr and Elf_Off depending on is64Bits. -template +template struct ELFDataTypeTypedefHelperCommon { typedef support::detail::packed_endian_specific_integral - Elf_Half; + ::value> Elf_Half; typedef support::detail::packed_endian_specific_integral - Elf_Word; + ::value> Elf_Word; typedef support::detail::packed_endian_specific_integral - Elf_Sword; + ::value> Elf_Sword; typedef support::detail::packed_endian_specific_integral - Elf_Xword; + ::value> Elf_Xword; typedef support::detail::packed_endian_specific_integral - Elf_Sxword; + ::value> Elf_Sxword; }; -template +template struct ELFDataTypeTypedefHelper; /// ELF 32bit types. -template -struct ELFDataTypeTypedefHelper - : ELFDataTypeTypedefHelperCommon { +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct ELFDataTypeTypedefHelper > + : ELFDataTypeTypedefHelperCommon { typedef uint32_t value_type; typedef support::detail::packed_endian_specific_integral - Elf_Addr; + ::value> Elf_Addr; typedef support::detail::packed_endian_specific_integral - Elf_Off; + ::value> Elf_Off; }; /// ELF 64bit types. -template -struct ELFDataTypeTypedefHelper - : ELFDataTypeTypedefHelperCommon{ +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct ELFDataTypeTypedefHelper > + : ELFDataTypeTypedefHelperCommon { typedef uint64_t value_type; typedef support::detail::packed_endian_specific_integral - Elf_Addr; + ::value> Elf_Addr; typedef support::detail::packed_endian_specific_integral - Elf_Off; + ::value> Elf_Off; }; // I really don't like doing this, but the alternative is copypasta. -#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Addr Elf_Addr; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Off Elf_Off; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Half Elf_Half; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Word Elf_Word; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Sword Elf_Sword; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Xword Elf_Xword; \ -typedef typename \ - ELFDataTypeTypedefHelper::Elf_Sxword Elf_Sxword; +#define LLVM_ELF_IMPORT_TYPES(ELFT) \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Addr Elf_Addr; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Off Elf_Off; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Half Elf_Half; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Word Elf_Word; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Sword Elf_Sword; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Xword Elf_Xword; \ +typedef typename ELFDataTypeTypedefHelper ::Elf_Sxword Elf_Sxword; + +// This is required to get template types into a macro :( +#define LLVM_ELF_COMMA , // Section header. -template +template struct Elf_Shdr_Base; -template -struct Elf_Shdr_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Shdr_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word sh_name; // Section name (index into string table) Elf_Word sh_type; // Section type (SHT_*) Elf_Word sh_flags; // Section flags (SHF_*) @@ -118,9 +142,11 @@ struct Elf_Shdr_Base { Elf_Word sh_entsize; // Size of records contained within the section }; -template -struct Elf_Shdr_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Shdr_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word sh_name; // Section name (index into string table) Elf_Word sh_type; // Section type (SHT_*) Elf_Xword sh_flags; // Section flags (SHF_*) @@ -133,10 +159,10 @@ struct Elf_Shdr_Base { Elf_Xword sh_entsize; // Size of records contained within the section }; -template -struct Elf_Shdr_Impl : Elf_Shdr_Base { - using Elf_Shdr_Base::sh_entsize; - using Elf_Shdr_Base::sh_size; +template +struct Elf_Shdr_Impl : Elf_Shdr_Base { + using Elf_Shdr_Base::sh_entsize; + using Elf_Shdr_Base::sh_size; /// @brief Get the number of entities this section contains if it has any. unsigned getEntityCount() const { @@ -146,12 +172,14 @@ struct Elf_Shdr_Impl : Elf_Shdr_Base { } }; -template +template struct Elf_Sym_Base; -template -struct Elf_Sym_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Sym_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word st_name; // Symbol name (index into string table) Elf_Addr st_value; // Value or address associated with the symbol Elf_Word st_size; // Size of the symbol @@ -160,9 +188,11 @@ struct Elf_Sym_Base { Elf_Half st_shndx; // Which section (header table index) it's defined in }; -template -struct Elf_Sym_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Sym_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word st_name; // Symbol name (index into string table) unsigned char st_info; // Symbol's type and binding attributes unsigned char st_other; // Must be zero; reserved @@ -171,9 +201,9 @@ struct Elf_Sym_Base { Elf_Xword st_size; // Size of the symbol }; -template -struct Elf_Sym_Impl : Elf_Sym_Base { - using Elf_Sym_Base::st_info; +template +struct Elf_Sym_Impl : Elf_Sym_Base { + using Elf_Sym_Base::st_info; // These accessors and mutators correspond to the ELF32_ST_BIND, // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification: @@ -188,21 +218,21 @@ struct Elf_Sym_Impl : Elf_Sym_Base { /// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section /// (.gnu.version). This structure is identical for ELF32 and ELF64. -template +template struct Elf_Versym_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Half vs_index; // Version index with flags (e.g. VERSYM_HIDDEN) }; -template +template struct Elf_Verdaux_Impl; /// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section /// (.gnu.version_d). This structure is identical for ELF32 and ELF64. -template +template struct Elf_Verdef_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) - typedef Elf_Verdaux_Impl Elf_Verdaux; + LLVM_ELF_IMPORT_TYPES(ELFT) + typedef Elf_Verdaux_Impl Elf_Verdaux; Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT) Elf_Half vd_flags; // Bitwise flags (VER_DEF_*) Elf_Half vd_ndx; // Version index, used in .gnu.version entries @@ -219,18 +249,18 @@ struct Elf_Verdef_Impl { /// Elf_Verdaux: This is the structure of auxiliary data in the SHT_GNU_verdef /// section (.gnu.version_d). This structure is identical for ELF32 and ELF64. -template +template struct Elf_Verdaux_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word vda_name; // Version name (offset in string table) Elf_Word vda_next; // Offset to next Verdaux entry (in bytes) }; /// Elf_Verneed: This is the structure of entries in the SHT_GNU_verneed /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64. -template +template struct Elf_Verneed_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT) Elf_Half vn_cnt; // Number of associated Vernaux entries Elf_Word vn_file; // Library name (string table offset) @@ -240,9 +270,9 @@ struct Elf_Verneed_Impl { /// Elf_Vernaux: This is the structure of auxiliary data in SHT_GNU_verneed /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64. -template +template struct Elf_Vernaux_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word vna_hash; // Hash of dependency name Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*) Elf_Half vna_other; // Version index, used in .gnu.version entries @@ -252,12 +282,14 @@ struct Elf_Vernaux_Impl { /// Elf_Dyn_Base: This structure matches the form of entries in the dynamic /// table section (.dynamic) look like. -template +template struct Elf_Dyn_Base; -template -struct Elf_Dyn_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Dyn_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Sword d_tag; union { Elf_Word d_val; @@ -265,9 +297,11 @@ struct Elf_Dyn_Base { } d_un; }; -template -struct Elf_Dyn_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Dyn_Base > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Sxword d_tag; union { Elf_Xword d_val; @@ -276,120 +310,154 @@ struct Elf_Dyn_Base { }; /// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters. -template -struct Elf_Dyn_Impl : Elf_Dyn_Base { - using Elf_Dyn_Base::d_tag; - using Elf_Dyn_Base::d_un; +template +struct Elf_Dyn_Impl : Elf_Dyn_Base { + using Elf_Dyn_Base::d_tag; + using Elf_Dyn_Base::d_un; int64_t getTag() const { return d_tag; } uint64_t getVal() const { return d_un.d_val; } uint64_t getPtr() const { return d_un.ptr; } }; -template -class ELFObjectFile; - -// DynRefImpl: Reference to an entry in the dynamic table -// This is an ELF-specific interface. -template -class DynRefImpl { - typedef Elf_Dyn_Impl Elf_Dyn; - typedef ELFObjectFile OwningType; - - DataRefImpl DynPimpl; - const OwningType *OwningObject; - -public: - DynRefImpl() : OwningObject(NULL) { } - - DynRefImpl(DataRefImpl DynP, const OwningType *Owner); - - bool operator==(const DynRefImpl &Other) const; - bool operator <(const DynRefImpl &Other) const; - - error_code getNext(DynRefImpl &Result) const; - int64_t getTag() const; - uint64_t getVal() const; - uint64_t getPtr() const; - - DataRefImpl getRawDataRefImpl() const; -}; - // Elf_Rel: Elf Relocation -template +template struct Elf_Rel_Base; -template -struct Elf_Rel_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Rel_Base, false> { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) Elf_Word r_info; // Symbol table index and type of relocation to apply + + uint32_t getRInfo(bool isMips64EL) const { + assert(!isMips64EL); + return r_info; + } + void setRInfo(uint32_t R) { + r_info = R; + } }; -template -struct Elf_Rel_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Rel_Base, false> { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) Elf_Xword r_info; // Symbol table index and type of relocation to apply + + uint64_t getRInfo(bool isMips64EL) const { + uint64_t t = r_info; + if (!isMips64EL) + return t; + // Mip64 little endian has a "special" encoding of r_info. Instead of one + // 64 bit little endian number, it is a little ending 32 bit number followed + // by a 32 bit big endian number. + return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) | + ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff); + return r_info; + } + void setRInfo(uint64_t R) { + // FIXME: Add mips64el support. + r_info = R; + } }; -template -struct Elf_Rel_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Rel_Base, true> { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) Elf_Word r_info; // Symbol table index and type of relocation to apply Elf_Sword r_addend; // Compute value for relocatable field by adding this + + uint32_t getRInfo(bool isMips64EL) const { + assert(!isMips64EL); + return r_info; + } + void setRInfo(uint32_t R) { + r_info = R; + } }; -template -struct Elf_Rel_Base { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Rel_Base, true> { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) Elf_Xword r_info; // Symbol table index and type of relocation to apply Elf_Sxword r_addend; // Compute value for relocatable field by adding this. + + uint64_t getRInfo(bool isMips64EL) const { + // Mip64 little endian has a "special" encoding of r_info. Instead of one + // 64 bit little endian number, it is a little ending 32 bit number followed + // by a 32 bit big endian number. + uint64_t t = r_info; + if (!isMips64EL) + return t; + return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) | + ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff); + } + void setRInfo(uint64_t R) { + // FIXME: Add mips64el support. + r_info = R; + } }; -template +template struct Elf_Rel_Impl; -template -struct Elf_Rel_Impl - : Elf_Rel_Base { - using Elf_Rel_Base::r_info; - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign, bool isRela> +struct Elf_Rel_Impl, isRela> + : Elf_Rel_Base, isRela> { + LLVM_ELF_IMPORT_TYPES(ELFT) // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, // and ELF64_R_INFO macros defined in the ELF specification: - uint64_t getSymbol() const { return (r_info >> 32); } - unsigned char getType() const { - return (unsigned char) (r_info & 0xffffffffL); + uint32_t getSymbol(bool isMips64EL) const { + return (uint32_t) (this->getRInfo(isMips64EL) >> 32); } - void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); } - void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } - void setSymbolAndType(uint64_t s, unsigned char t) { - r_info = (s << 32) + (t&0xffffffffL); + uint32_t getType(bool isMips64EL) const { + return (uint32_t) (this->getRInfo(isMips64EL) & 0xffffffffL); + } + void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); } + void setType(uint32_t t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(uint32_t s, uint32_t t) { + this->setRInfo(((uint64_t)s << 32) + (t&0xffffffffL)); } }; -template -struct Elf_Rel_Impl - : Elf_Rel_Base { - using Elf_Rel_Base::r_info; - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign, bool isRela> +struct Elf_Rel_Impl, isRela> + : Elf_Rel_Base, isRela> { + LLVM_ELF_IMPORT_TYPES(ELFT) // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, // and ELF32_R_INFO macros defined in the ELF specification: - uint32_t getSymbol() const { return (r_info >> 8); } - unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); } + uint32_t getSymbol(bool isMips64EL) const { + return this->getRInfo(isMips64EL) >> 8; + } + unsigned char getType(bool isMips64EL) const { + return (unsigned char) (this->getRInfo(isMips64EL) & 0x0ff); + } void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); } void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } void setSymbolAndType(uint32_t s, unsigned char t) { - r_info = (s << 8) + t; + this->setRInfo((s << 8) + t); } }; -template +template struct Elf_Ehdr_Impl { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes Elf_Half e_type; // Type of file (see ET_*) Elf_Half e_machine; // Required architecture for this file (see EM_*) @@ -412,15 +480,17 @@ struct Elf_Ehdr_Impl { unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } }; -template -struct Elf_Phdr; +template +struct Elf_Phdr_Impl; -template -struct Elf_Phdr { - LLVM_ELF_IMPORT_TYPES(target_endianness, false) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Phdr_Impl > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word p_type; // Type of segment Elf_Off p_offset; // FileOffset where segment is located, in bytes - Elf_Addr p_vaddr; // Virtual Address of beginning of segment + Elf_Addr p_vaddr; // Virtual Address of beginning of segment Elf_Addr p_paddr; // Physical address of beginning of segment (OS-specific) Elf_Word p_filesz; // Num. of bytes in file image of segment (may be zero) Elf_Word p_memsz; // Num. of bytes in mem image of segment (may be zero) @@ -428,36 +498,107 @@ struct Elf_Phdr { Elf_Word p_align; // Segment alignment constraint }; -template -struct Elf_Phdr { - LLVM_ELF_IMPORT_TYPES(target_endianness, true) +template class ELFT, + endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_Phdr_Impl > { + LLVM_ELF_IMPORT_TYPES(ELFT) Elf_Word p_type; // Type of segment Elf_Word p_flags; // Segment flags Elf_Off p_offset; // FileOffset where segment is located, in bytes - Elf_Addr p_vaddr; // Virtual Address of beginning of segment + Elf_Addr p_vaddr; // Virtual Address of beginning of segment Elf_Addr p_paddr; // Physical address of beginning of segment (OS-specific) - Elf_Word p_filesz; // Num. of bytes in file image of segment (may be zero) - Elf_Word p_memsz; // Num. of bytes in mem image of segment (may be zero) - Elf_Word p_align; // Segment alignment constraint + Elf_Xword p_filesz; // Num. of bytes in file image of segment (may be zero) + Elf_Xword p_memsz; // Num. of bytes in mem image of segment (may be zero) + Elf_Xword p_align; // Segment alignment constraint }; -template +template class ELFObjectFile : public ObjectFile { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + LLVM_ELF_IMPORT_TYPES(ELFT) - typedef Elf_Ehdr_Impl Elf_Ehdr; - typedef Elf_Shdr_Impl Elf_Shdr; - typedef Elf_Sym_Impl Elf_Sym; - typedef Elf_Dyn_Impl Elf_Dyn; - typedef Elf_Rel_Impl Elf_Rel; - typedef Elf_Rel_Impl Elf_Rela; - typedef Elf_Verdef_Impl Elf_Verdef; - typedef Elf_Verdaux_Impl Elf_Verdaux; - typedef Elf_Verneed_Impl Elf_Verneed; - typedef Elf_Vernaux_Impl Elf_Vernaux; - typedef Elf_Versym_Impl Elf_Versym; - typedef DynRefImpl DynRef; - typedef content_iterator dyn_iterator; +public: + /// \brief Iterate over constant sized entities. + template + class ELFEntityIterator { + public: + typedef ptrdiff_t difference_type; + typedef EntT value_type; + typedef std::random_access_iterator_tag iterator_category; + typedef value_type &reference; + typedef value_type *pointer; + + /// \brief Default construct iterator. + ELFEntityIterator() : EntitySize(0), Current(0) {} + ELFEntityIterator(uint64_t EntSize, const char *Start) + : EntitySize(EntSize) + , Current(Start) {} + + reference operator *() { + assert(Current && "Attempted to dereference an invalid iterator!"); + return *reinterpret_cast(Current); + } + + pointer operator ->() { + assert(Current && "Attempted to dereference an invalid iterator!"); + return reinterpret_cast(Current); + } + + bool operator ==(const ELFEntityIterator &Other) { + return Current == Other.Current; + } + + bool operator !=(const ELFEntityIterator &Other) { + return !(*this == Other); + } + + ELFEntityIterator &operator ++() { + assert(Current && "Attempted to increment an invalid iterator!"); + Current += EntitySize; + return *this; + } + + ELFEntityIterator operator ++(int) { + ELFEntityIterator Tmp = *this; + ++*this; + return Tmp; + } + + ELFEntityIterator &operator =(const ELFEntityIterator &Other) { + EntitySize = Other.EntitySize; + Current = Other.Current; + return *this; + } + + difference_type operator -(const ELFEntityIterator &Other) const { + assert(EntitySize == Other.EntitySize && + "Subtracting iterators of different EntitiySize!"); + return (Current - Other.Current) / EntitySize; + } + + const char *get() const { return Current; } + + private: + uint64_t EntitySize; + const char *Current; + }; + + typedef Elf_Ehdr_Impl Elf_Ehdr; + typedef Elf_Shdr_Impl Elf_Shdr; + typedef Elf_Sym_Impl Elf_Sym; + typedef Elf_Dyn_Impl Elf_Dyn; + typedef Elf_Phdr_Impl Elf_Phdr; + typedef Elf_Rel_Impl Elf_Rel; + typedef Elf_Rel_Impl Elf_Rela; + typedef Elf_Verdef_Impl Elf_Verdef; + typedef Elf_Verdaux_Impl Elf_Verdaux; + typedef Elf_Verneed_Impl Elf_Verneed; + typedef Elf_Vernaux_Impl Elf_Vernaux; + typedef Elf_Versym_Impl Elf_Versym; + typedef ELFEntityIterator Elf_Dyn_iterator; + typedef ELFEntityIterator Elf_Sym_iterator; + typedef ELFEntityIterator Elf_Rela_Iter; + typedef ELFEntityIterator Elf_Rel_Iter; protected: // This flag is used for classof, to distinguish ELFObjectFile from @@ -466,7 +607,7 @@ protected: bool isDyldELFObject; private: - typedef SmallVector Sections_t; + typedef SmallVector Sections_t; typedef DenseMap IndexMap_t; typedef DenseMap > RelocMap_t; @@ -491,58 +632,6 @@ private: // This is set the first time getLoadName is called. mutable const char *dt_soname; -public: - /// \brief Iterate over relocations in a .rel or .rela section. - template - class ELFRelocationIterator { - public: - typedef void difference_type; - typedef const RelocT value_type; - typedef std::forward_iterator_tag iterator_category; - typedef value_type &reference; - typedef value_type *pointer; - - /// \brief Default construct iterator. - ELFRelocationIterator() : Section(0), Current(0) {} - ELFRelocationIterator(const Elf_Shdr *Sec, const char *Start) - : Section(Sec) - , Current(Start) {} - - reference operator *() { - assert(Current && "Attempted to dereference an invalid iterator!"); - return *reinterpret_cast(Current); - } - - pointer operator ->() { - assert(Current && "Attempted to dereference an invalid iterator!"); - return reinterpret_cast(Current); - } - - bool operator ==(const ELFRelocationIterator &Other) { - return Section == Other.Section && Current == Other.Current; - } - - bool operator !=(const ELFRelocationIterator &Other) { - return !(*this == Other); - } - - ELFRelocationIterator &operator ++(int) { - assert(Current && "Attempted to increment an invalid iterator!"); - Current += Section->sh_entsize; - return *this; - } - - ELFRelocationIterator operator ++() { - ELFRelocationIterator Tmp = *this; - ++*this; - return Tmp; - } - - private: - const Elf_Shdr *Section; - const char *Current; - }; - private: // Records for each version index the corresponding Verdef or Vernaux entry. // This is filled the first time LoadVersionMap() is called. @@ -579,6 +668,7 @@ private: return getSection(Rel.w.b); } +public: bool isRelocationHasAddend(DataRefImpl Rel) const; template const T *getEntry(uint16_t Section, uint32_t Entry) const; @@ -609,6 +699,7 @@ public: const Elf_Dyn *getDyn(DataRefImpl DynData) const; error_code getSymbolVersion(SymbolRef Symb, StringRef &Version, bool &IsDefault) const; + uint64_t getSymbolIndex(const Elf_Sym *sym) const; protected: virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; @@ -622,9 +713,6 @@ protected: section_iterator &Res) const; virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const; - friend class DynRefImpl; - virtual error_code getDynNext(DataRefImpl DynData, DynRef &Result) const; - virtual error_code getLibraryNext(DataRefImpl Data, LibraryRef &Result) const; virtual error_code getLibraryPath(DataRefImpl Data, StringRef &Res) const; @@ -666,6 +754,13 @@ protected: public: ELFObjectFile(MemoryBuffer *Object, error_code &ec); + + bool isMips64EL() const { + return Header->e_machine == ELF::EM_MIPS && + Header->getFileClass() == ELF::ELFCLASS64 && + Header->getDataEncoding() == ELF::ELFDATA2LSB; + } + virtual symbol_iterator begin_symbols() const; virtual symbol_iterator end_symbols() const; @@ -678,30 +773,70 @@ public: virtual library_iterator begin_libraries_needed() const; virtual library_iterator end_libraries_needed() const; - virtual dyn_iterator begin_dynamic_table() const; - virtual dyn_iterator end_dynamic_table() const; - - typedef ELFRelocationIterator Elf_Rela_Iter; - typedef ELFRelocationIterator Elf_Rel_Iter; - - virtual Elf_Rela_Iter beginELFRela(const Elf_Shdr *sec) const { - return Elf_Rela_Iter(sec, (const char *)(base() + sec->sh_offset)); + const Elf_Shdr *getDynamicSymbolTableSectionHeader() const { + return SymbolTableSections[0]; } - virtual Elf_Rela_Iter endELFRela(const Elf_Shdr *sec) const { - return Elf_Rela_Iter(sec, (const char *) + const Elf_Shdr *getDynamicStringTableSectionHeader() const { + return dot_dynstr_sec; + } + + Elf_Dyn_iterator begin_dynamic_table() const; + /// \param NULLEnd use one past the first DT_NULL entry as the end instead of + /// the section size. + Elf_Dyn_iterator end_dynamic_table(bool NULLEnd = false) const; + + Elf_Sym_iterator begin_elf_dynamic_symbols() const { + const Elf_Shdr *DynSymtab = SymbolTableSections[0]; + if (DynSymtab) + return Elf_Sym_iterator(DynSymtab->sh_entsize, + (const char *)base() + DynSymtab->sh_offset); + return Elf_Sym_iterator(0, 0); + } + + Elf_Sym_iterator end_elf_dynamic_symbols() const { + const Elf_Shdr *DynSymtab = SymbolTableSections[0]; + if (DynSymtab) + return Elf_Sym_iterator(DynSymtab->sh_entsize, (const char *)base() + + DynSymtab->sh_offset + DynSymtab->sh_size); + return Elf_Sym_iterator(0, 0); + } + + Elf_Rela_Iter beginELFRela(const Elf_Shdr *sec) const { + return Elf_Rela_Iter(sec->sh_entsize, + (const char *)(base() + sec->sh_offset)); + } + + Elf_Rela_Iter endELFRela(const Elf_Shdr *sec) const { + return Elf_Rela_Iter(sec->sh_entsize, (const char *) (base() + sec->sh_offset + sec->sh_size)); } - virtual Elf_Rel_Iter beginELFRel(const Elf_Shdr *sec) const { - return Elf_Rel_Iter(sec, (const char *)(base() + sec->sh_offset)); + Elf_Rel_Iter beginELFRel(const Elf_Shdr *sec) const { + return Elf_Rel_Iter(sec->sh_entsize, + (const char *)(base() + sec->sh_offset)); } - virtual Elf_Rel_Iter endELFRel(const Elf_Shdr *sec) const { - return Elf_Rel_Iter(sec, (const char *) + Elf_Rel_Iter endELFRel(const Elf_Shdr *sec) const { + return Elf_Rel_Iter(sec->sh_entsize, (const char *) (base() + sec->sh_offset + sec->sh_size)); } + /// \brief Iterate over program header table. + typedef ELFEntityIterator Elf_Phdr_Iter; + + Elf_Phdr_Iter begin_program_headers() const { + return Elf_Phdr_Iter(Header->e_phentsize, + (const char*)base() + Header->e_phoff); + } + + Elf_Phdr_Iter end_program_headers() const { + return Elf_Phdr_Iter(Header->e_phentsize, + (const char*)base() + + Header->e_phoff + + (Header->e_phnum * Header->e_phentsize)); + } + virtual uint8_t getBytesInAddress() const; virtual StringRef getFileFormatName() const; virtual StringRef getObjectType() const { return "ELF"; } @@ -713,6 +848,7 @@ public: uint64_t getNumSections() const; uint64_t getStringTableIndex() const; ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; + const Elf_Ehdr *getElfHeader() const; const Elf_Shdr *getSection(const Elf_Sym *symb) const; const Elf_Shdr *getElfSection(section_iterator &It) const; const Elf_Sym *getElfSymbol(symbol_iterator &It) const; @@ -721,16 +857,15 @@ public: // Methods for type inquiry through isa, cast, and dyn_cast bool isDyldType() const { return isDyldELFObject; } static inline bool classof(const Binary *v) { - return v->getType() == getELFType(target_endianness == support::little, - is64Bits); + return v->getType() == getELFType(ELFT::TargetEndianness == support::little, + ELFT::Is64Bits); } }; // Iterate through the version definitions, and place each Elf_Verdef // in the VersionMap according to its index. -template -void ELFObjectFile:: - LoadVersionDefs(const Elf_Shdr *sec) const { +template +void ELFObjectFile::LoadVersionDefs(const Elf_Shdr *sec) const { unsigned vd_size = sec->sh_size; // Size of section in bytes unsigned vd_count = sec->sh_info; // Number of Verdef entries const char *sec_start = (const char*)base() + sec->sh_offset; @@ -754,9 +889,8 @@ void ELFObjectFile:: // Iterate through the versions needed section, and place each Elf_Vernaux // in the VersionMap according to its index. -template -void ELFObjectFile:: - LoadVersionNeeds(const Elf_Shdr *sec) const { +template +void ELFObjectFile::LoadVersionNeeds(const Elf_Shdr *sec) const { unsigned vn_size = sec->sh_size; // Size of section in bytes unsigned vn_count = sec->sh_info; // Number of Verneed entries const char *sec_start = (const char*)base() + sec->sh_offset; @@ -787,8 +921,8 @@ void ELFObjectFile:: } } -template -void ELFObjectFile::LoadVersionMap() const { +template +void ELFObjectFile::LoadVersionMap() const { // If there is no dynamic symtab or version table, there is nothing to do. if (SymbolTableSections[0] == NULL || dot_gnu_version_sec == NULL) return; @@ -809,9 +943,9 @@ void ELFObjectFile::LoadVersionMap() const { LoadVersionNeeds(dot_gnu_version_r_sec); } -template -void ELFObjectFile - ::validateSymbol(DataRefImpl Symb) const { +template +void ELFObjectFile::validateSymbol(DataRefImpl Symb) const { +#ifndef NDEBUG const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; // FIXME: We really need to do proper error handling in the case of an invalid @@ -826,12 +960,12 @@ void ELFObjectFile + SymbolTableSection->sh_size))) // FIXME: Proper error handling. report_fatal_error("Symb must point to a valid symbol!"); +#endif } -template -error_code ELFObjectFile - ::getSymbolNext(DataRefImpl Symb, - SymbolRef &Result) const { +template +error_code ELFObjectFile::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { validateSymbol(Symb); const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; @@ -856,20 +990,18 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolName(DataRefImpl Symb, - StringRef &Result) const { +template +error_code ELFObjectFile::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); return getSymbolName(SymbolTableSections[Symb.d.b], symb, Result); } -template -error_code ELFObjectFile - ::getSymbolVersion(SymbolRef SymRef, - StringRef &Version, - bool &IsDefault) const { +template +error_code ELFObjectFile::getSymbolVersion(SymbolRef SymRef, + StringRef &Version, + bool &IsDefault) const { DataRefImpl Symb = SymRef.getRawDataRefImpl(); validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); @@ -877,18 +1009,17 @@ error_code ELFObjectFile Version, IsDefault); } -template -ELF::Elf64_Word ELFObjectFile - ::getSymbolTableIndex(const Elf_Sym *symb) const { +template +ELF::Elf64_Word ELFObjectFile + ::getSymbolTableIndex(const Elf_Sym *symb) const { if (symb->st_shndx == ELF::SHN_XINDEX) return ExtendedSymbolTable.lookup(symb); return symb->st_shndx; } -template -const typename ELFObjectFile::Elf_Shdr * -ELFObjectFile - ::getSection(const Elf_Sym *symb) const { +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getSection(const Elf_Sym *symb) const { if (symb->st_shndx == ELF::SHN_XINDEX) return getSection(ExtendedSymbolTable.lookup(symb)); if (symb->st_shndx >= ELF::SHN_LORESERVE) @@ -896,35 +1027,37 @@ ELFObjectFile return getSection(symb->st_shndx); } -template -const typename ELFObjectFile::Elf_Shdr * -ELFObjectFile - ::getElfSection(section_iterator &It) const { +template +const typename ELFObjectFile::Elf_Ehdr * +ELFObjectFile::getElfHeader() const { + return Header; +} + +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getElfSection(section_iterator &It) const { llvm::object::DataRefImpl ShdrRef = It->getRawDataRefImpl(); return reinterpret_cast(ShdrRef.p); } -template -const typename ELFObjectFile::Elf_Sym * -ELFObjectFile - ::getElfSymbol(symbol_iterator &It) const { +template +const typename ELFObjectFile::Elf_Sym * +ELFObjectFile::getElfSymbol(symbol_iterator &It) const { return getSymbol(It->getRawDataRefImpl()); } -template -const typename ELFObjectFile::Elf_Sym * -ELFObjectFile - ::getElfSymbol(uint32_t index) const { +template +const typename ELFObjectFile::Elf_Sym * +ELFObjectFile::getElfSymbol(uint32_t index) const { DataRefImpl SymbolData; SymbolData.d.a = index; SymbolData.d.b = 1; return getSymbol(SymbolData); } -template -error_code ELFObjectFile - ::getSymbolFileOffset(DataRefImpl Symb, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSymbolFileOffset(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; @@ -942,7 +1075,7 @@ error_code ELFObjectFile switch (symb->getType()) { case ELF::STT_SECTION: - Result = Section ? Section->sh_addr : UnknownAddressOrSize; + Result = Section ? Section->sh_offset : UnknownAddressOrSize; return object_error::success; case ELF::STT_FUNC: case ELF::STT_OBJECT: @@ -956,10 +1089,9 @@ error_code ELFObjectFile } } -template -error_code ELFObjectFile - ::getSymbolAddress(DataRefImpl Symb, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; @@ -991,6 +1123,11 @@ error_code ELFObjectFile IsRelocatable = true; } Result = symb->st_value; + + // Clear the ARM/Thumb indicator flag. + if (Header->e_machine == ELF::EM_ARM) + Result &= ~1; + if (IsRelocatable && Section != 0) Result += Section->sh_addr; return object_error::success; @@ -1000,10 +1137,9 @@ error_code ELFObjectFile } } -template -error_code ELFObjectFile - ::getSymbolSize(DataRefImpl Symb, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if (symb->st_size == 0) @@ -1012,10 +1148,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolNMTypeChar(DataRefImpl Symb, - char &Result) const { +template +error_code ELFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section = getSection(symb); @@ -1077,10 +1212,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolType(DataRefImpl Symb, - SymbolRef::Type &Result) const { +template +error_code ELFObjectFile::getSymbolType(DataRefImpl Symb, + SymbolRef::Type &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); @@ -1109,10 +1243,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolFlags(DataRefImpl Symb, - uint32_t &Result) const { +template +error_code ELFObjectFile::getSymbolFlags(DataRefImpl Symb, + uint32_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); @@ -1144,10 +1277,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const { +template +error_code ELFObjectFile::getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *sec = getSection(symb); @@ -1161,19 +1293,18 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolValue(DataRefImpl Symb, - uint64_t &Val) const { +template +error_code ELFObjectFile::getSymbolValue(DataRefImpl Symb, + uint64_t &Val) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); Val = symb->st_value; return object_error::success; } -template -error_code ELFObjectFile - ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { +template +error_code ELFObjectFile::getSectionNext(DataRefImpl Sec, + SectionRef &Result) const { const uint8_t *sec = reinterpret_cast(Sec.p); sec += Header->e_shentsize; Sec.p = reinterpret_cast(sec); @@ -1181,65 +1312,58 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSectionName(DataRefImpl Sec, - StringRef &Result) const { +template +error_code ELFObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name)); return object_error::success; } -template -error_code ELFObjectFile - ::getSectionAddress(DataRefImpl Sec, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); Result = sec->sh_addr; return object_error::success; } -template -error_code ELFObjectFile - ::getSectionSize(DataRefImpl Sec, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); Result = sec->sh_size; return object_error::success; } -template -error_code ELFObjectFile - ::getSectionContents(DataRefImpl Sec, - StringRef &Result) const { +template +error_code ELFObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); const char *start = (const char*)base() + sec->sh_offset; Result = StringRef(start, sec->sh_size); return object_error::success; } -template -error_code ELFObjectFile - ::getSectionContents(const Elf_Shdr *Sec, - StringRef &Result) const { +template +error_code ELFObjectFile::getSectionContents(const Elf_Shdr *Sec, + StringRef &Result) const { const char *start = (const char*)base() + Sec->sh_offset; Result = StringRef(start, Sec->sh_size); return object_error::success; } -template -error_code ELFObjectFile - ::getSectionAlignment(DataRefImpl Sec, - uint64_t &Result) const { +template +error_code ELFObjectFile::getSectionAlignment(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); Result = sec->sh_addralign; return object_error::success; } -template -error_code ELFObjectFile - ::isSectionText(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionText(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_flags & ELF::SHF_EXECINSTR) Result = true; @@ -1248,10 +1372,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::isSectionData(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionData(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) && sec->sh_type == ELF::SHT_PROGBITS) @@ -1261,10 +1384,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::isSectionBSS(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionBSS(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) && sec->sh_type == ELF::SHT_NOBITS) @@ -1274,10 +1396,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::isSectionRequiredForExecution(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionRequiredForExecution( + DataRefImpl Sec, bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_flags & ELF::SHF_ALLOC) Result = true; @@ -1286,10 +1407,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::isSectionVirtual(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionVirtual(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_type == ELF::SHT_NOBITS) Result = true; @@ -1298,24 +1418,19 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::isSectionZeroInit(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionZeroInit(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); // For ELF, all zero-init sections are virtual (that is, they occupy no space // in the object image) and vice versa. - if (sec->sh_flags & ELF::SHT_NOBITS) - Result = true; - else - Result = false; + Result = sec->sh_type == ELF::SHT_NOBITS; return object_error::success; } -template -error_code ELFObjectFile - ::isSectionReadOnlyData(DataRefImpl Sec, - bool &Result) const { +template +error_code ELFObjectFile::isSectionReadOnlyData(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast(Sec.p); if (sec->sh_flags & ELF::SHF_WRITE || sec->sh_flags & ELF::SHF_EXECINSTR) Result = false; @@ -1324,19 +1439,26 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::sectionContainsSymbol(DataRefImpl Sec, - DataRefImpl Symb, - bool &Result) const { - // FIXME: Unimplemented. - Result = false; +template +error_code ELFObjectFile::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { + validateSymbol(Symb); + + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + const Elf_Sym *symb = getSymbol(Symb); + + unsigned shndx = symb->st_shndx; + bool Reserved = shndx >= ELF::SHN_LORESERVE + && shndx <= ELF::SHN_HIRESERVE; + + Result = !Reserved && (sec == getSection(symb->st_shndx)); return object_error::success; } -template -relocation_iterator ELFObjectFile - ::getSectionRelBegin(DataRefImpl Sec) const { +template +relocation_iterator +ELFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { DataRefImpl RelData; const Elf_Shdr *sec = reinterpret_cast(Sec.p); typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); @@ -1348,9 +1470,9 @@ relocation_iterator ELFObjectFile return relocation_iterator(RelocationRef(RelData, this)); } -template -relocation_iterator ELFObjectFile - ::getSectionRelEnd(DataRefImpl Sec) const { +template +relocation_iterator +ELFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { DataRefImpl RelData; const Elf_Shdr *sec = reinterpret_cast(Sec.p); typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); @@ -1366,10 +1488,9 @@ relocation_iterator ELFObjectFile } // Relocations -template -error_code ELFObjectFile - ::getRelocationNext(DataRefImpl Rel, - RelocationRef &Result) const { +template +error_code ELFObjectFile::getRelocationNext(DataRefImpl Rel, + RelocationRef &Result) const { ++Rel.w.c; const Elf_Shdr *relocsec = getSection(Rel.w.b); if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) { @@ -1395,21 +1516,20 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getRelocationSymbol(DataRefImpl Rel, - SymbolRef &Result) const { +template +error_code ELFObjectFile::getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Result) const { uint32_t symbolIdx; const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { default : report_fatal_error("Invalid section type in Rel!"); case ELF::SHT_REL : { - symbolIdx = getRel(Rel)->getSymbol(); + symbolIdx = getRel(Rel)->getSymbol(isMips64EL()); break; } case ELF::SHT_RELA : { - symbolIdx = getRela(Rel)->getSymbol(); + symbolIdx = getRela(Rel)->getSymbol(isMips64EL()); break; } } @@ -1423,10 +1543,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getRelocationAddress(DataRefImpl Rel, - uint64_t &Result) const { +template +error_code ELFObjectFile::getRelocationAddress(DataRefImpl Rel, + uint64_t &Result) const { uint64_t offset; const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { @@ -1446,10 +1565,9 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getRelocationOffset(DataRefImpl Rel, - uint64_t &Result) const { +template +error_code ELFObjectFile::getRelocationOffset(DataRefImpl Rel, + uint64_t &Result) const { uint64_t offset; const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { @@ -1469,20 +1587,19 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getRelocationType(DataRefImpl Rel, - uint64_t &Result) const { +template +error_code ELFObjectFile::getRelocationType(DataRefImpl Rel, + uint64_t &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { default : report_fatal_error("Invalid section type in Rel!"); case ELF::SHT_REL : { - Result = getRel(Rel)->getType(); + Result = getRel(Rel)->getType(isMips64EL()); break; } case ELF::SHT_RELA : { - Result = getRela(Rel)->getType(); + Result = getRela(Rel)->getType(isMips64EL()); break; } } @@ -1492,22 +1609,21 @@ error_code ELFObjectFile #define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \ case ELF::enum: res = #enum; break; -template -error_code ELFObjectFile - ::getRelocationTypeName(DataRefImpl Rel, - SmallVectorImpl &Result) const { +template +error_code ELFObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); - uint8_t type; + uint32_t type; StringRef res; switch (sec->sh_type) { default : return object_error::parse_failed; case ELF::SHT_REL : { - type = getRel(Rel)->getType(); + type = getRel(Rel)->getType(isMips64EL()); break; } case ELF::SHT_RELA : { - type = getRela(Rel)->getType(); + type = getRela(Rel)->getType(isMips64EL()); break; } } @@ -1596,6 +1712,143 @@ error_code ELFObjectFile res = "Unknown"; } break; + case ELF::EM_MIPS: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT); + default: + res = "Unknown"; + } + break; + case ELF::EM_AARCH64: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL); + + default: + res = "Unknown"; + } + break; case ELF::EM_ARM: switch (type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE); @@ -1834,10 +2087,9 @@ error_code ELFObjectFile #undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME -template -error_code ELFObjectFile - ::getRelocationAdditionalInfo(DataRefImpl Rel, - int64_t &Result) const { +template +error_code ELFObjectFile::getRelocationAdditionalInfo( + DataRefImpl Rel, int64_t &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { default : @@ -1853,10 +2105,9 @@ error_code ELFObjectFile } } -template -error_code ELFObjectFile - ::getRelocationValueString(DataRefImpl Rel, - SmallVectorImpl &Result) const { +template +error_code ELFObjectFile::getRelocationValueString( + DataRefImpl Rel, SmallVectorImpl &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); uint8_t type; StringRef res; @@ -1866,14 +2117,14 @@ error_code ELFObjectFile default: return object_error::parse_failed; case ELF::SHT_REL: { - type = getRel(Rel)->getType(); - symbol_index = getRel(Rel)->getSymbol(); + type = getRel(Rel)->getType(isMips64EL()); + symbol_index = getRel(Rel)->getSymbol(isMips64EL()); // TODO: Read implicit addend from section data. break; } case ELF::SHT_RELA: { - type = getRela(Rel)->getType(); - symbol_index = getRela(Rel)->getSymbol(); + type = getRela(Rel)->getType(isMips64EL()); + symbol_index = getRela(Rel)->getSymbol(isMips64EL()); addend = getRela(Rel)->r_addend; break; } @@ -1911,6 +2162,7 @@ error_code ELFObjectFile res = "Unknown"; } break; + case ELF::EM_AARCH64: case ELF::EM_ARM: case ELF::EM_HEXAGON: res = symname; @@ -1924,20 +2176,21 @@ error_code ELFObjectFile } // Verify that the last byte in the string table in a null. -template -void ELFObjectFile - ::VerifyStrTab(const Elf_Shdr *sh) const { +template +void ELFObjectFile::VerifyStrTab(const Elf_Shdr *sh) const { const char *strtab = (const char*)base() + sh->sh_offset; if (strtab[sh->sh_size - 1] != 0) // FIXME: Proper error handling. report_fatal_error("String table must end with a null terminator!"); } -template -ELFObjectFile::ELFObjectFile(MemoryBuffer *Object - , error_code &ec) - : ObjectFile(getELFType(target_endianness == support::little, is64Bits), - Object, ec) +template +ELFObjectFile::ELFObjectFile(MemoryBuffer *Object, error_code &ec) + : ObjectFile(getELFType( + static_cast(ELFT::TargetEndianness) == support::little, + ELFT::Is64Bits), + Object, + ec) , isDyldELFObject(false) , SectionHeaderTable(0) , dot_shstrtab_sec(0) @@ -2094,9 +2347,22 @@ ELFObjectFile::ELFObjectFile(MemoryBuffer *Object } } -template -symbol_iterator ELFObjectFile - ::begin_symbols() const { +// Get the symbol table index in the symtab section given a symbol +template +uint64_t ELFObjectFile::getSymbolIndex(const Elf_Sym *Sym) const { + assert(SymbolTableSections.size() == 1 && "Only one symbol table supported!"); + const Elf_Shdr *SymTab = *SymbolTableSections.begin(); + uintptr_t SymLoc = uintptr_t(Sym); + uintptr_t SymTabLoc = uintptr_t(base() + SymTab->sh_offset); + assert(SymLoc > SymTabLoc && "Symbol not in symbol table!"); + uint64_t SymOffset = SymLoc - SymTabLoc; + assert(SymOffset % SymTab->sh_entsize == 0 && + "Symbol not multiple of symbol size!"); + return SymOffset / SymTab->sh_entsize; +} + +template +symbol_iterator ELFObjectFile::begin_symbols() const { DataRefImpl SymbolData; if (SymbolTableSections.size() <= 1) { SymbolData.d.a = std::numeric_limits::max(); @@ -2108,18 +2374,16 @@ symbol_iterator ELFObjectFile return symbol_iterator(SymbolRef(SymbolData, this)); } -template -symbol_iterator ELFObjectFile - ::end_symbols() const { +template +symbol_iterator ELFObjectFile::end_symbols() const { DataRefImpl SymbolData; SymbolData.d.a = std::numeric_limits::max(); SymbolData.d.b = std::numeric_limits::max(); return symbol_iterator(SymbolRef(SymbolData, this)); } -template -symbol_iterator ELFObjectFile - ::begin_dynamic_symbols() const { +template +symbol_iterator ELFObjectFile::begin_dynamic_symbols() const { DataRefImpl SymbolData; if (SymbolTableSections[0] == NULL) { SymbolData.d.a = std::numeric_limits::max(); @@ -2131,26 +2395,23 @@ symbol_iterator ELFObjectFile return symbol_iterator(SymbolRef(SymbolData, this)); } -template -symbol_iterator ELFObjectFile - ::end_dynamic_symbols() const { +template +symbol_iterator ELFObjectFile::end_dynamic_symbols() const { DataRefImpl SymbolData; SymbolData.d.a = std::numeric_limits::max(); SymbolData.d.b = std::numeric_limits::max(); return symbol_iterator(SymbolRef(SymbolData, this)); } -template -section_iterator ELFObjectFile - ::begin_sections() const { +template +section_iterator ELFObjectFile::begin_sections() const { DataRefImpl ret; ret.p = reinterpret_cast(base() + Header->e_shoff); return section_iterator(SectionRef(ret, this)); } -template -section_iterator ELFObjectFile - ::end_sections() const { +template +section_iterator ELFObjectFile::end_sections() const { DataRefImpl ret; ret.p = reinterpret_cast(base() + Header->e_shoff @@ -2158,58 +2419,47 @@ section_iterator ELFObjectFile return section_iterator(SectionRef(ret, this)); } -template -typename ELFObjectFile::dyn_iterator -ELFObjectFile::begin_dynamic_table() const { - DataRefImpl DynData; - if (dot_dynamic_sec == NULL || dot_dynamic_sec->sh_size == 0) { - DynData.d.a = std::numeric_limits::max(); - } else { - DynData.d.a = 0; +template +typename ELFObjectFile::Elf_Dyn_iterator +ELFObjectFile::begin_dynamic_table() const { + if (dot_dynamic_sec) + return Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize, + (const char *)base() + dot_dynamic_sec->sh_offset); + return Elf_Dyn_iterator(0, 0); +} + +template +typename ELFObjectFile::Elf_Dyn_iterator +ELFObjectFile::end_dynamic_table(bool NULLEnd) const { + if (dot_dynamic_sec) { + Elf_Dyn_iterator Ret(dot_dynamic_sec->sh_entsize, + (const char *)base() + dot_dynamic_sec->sh_offset + + dot_dynamic_sec->sh_size); + + if (NULLEnd) { + Elf_Dyn_iterator Start = begin_dynamic_table(); + while (Start != Ret && Start->getTag() != ELF::DT_NULL) + ++Start; + + // Include the DT_NULL. + if (Start != Ret) + ++Start; + Ret = Start; + } + return Ret; } - return dyn_iterator(DynRef(DynData, this)); + return Elf_Dyn_iterator(0, 0); } -template -typename ELFObjectFile::dyn_iterator -ELFObjectFile - ::end_dynamic_table() const { - DataRefImpl DynData; - DynData.d.a = std::numeric_limits::max(); - return dyn_iterator(DynRef(DynData, this)); -} - -template -error_code ELFObjectFile - ::getDynNext(DataRefImpl DynData, - DynRef &Result) const { - ++DynData.d.a; - - // Check to see if we are at the end of .dynamic - if (DynData.d.a >= dot_dynamic_sec->getEntityCount()) { - // We are at the end. Return the terminator. - DynData.d.a = std::numeric_limits::max(); - } - - Result = DynRef(DynData, this); - return object_error::success; -} - -template -StringRef -ELFObjectFile::getLoadName() const { +template +StringRef ELFObjectFile::getLoadName() const { if (!dt_soname) { // Find the DT_SONAME entry - dyn_iterator it = begin_dynamic_table(); - dyn_iterator ie = end_dynamic_table(); - error_code ec; - while (it != ie) { - if (it->getTag() == ELF::DT_SONAME) - break; - it.increment(ec); - if (ec) - report_fatal_error("dynamic table iteration failed"); - } + Elf_Dyn_iterator it = begin_dynamic_table(); + Elf_Dyn_iterator ie = end_dynamic_table(); + while (it != ie && it->getTag() != ELF::DT_SONAME) + ++it; + if (it != ie) { if (dot_dynstr_sec == NULL) report_fatal_error("Dynamic string table is missing"); @@ -2221,57 +2471,43 @@ ELFObjectFile::getLoadName() const { return dt_soname; } -template -library_iterator ELFObjectFile - ::begin_libraries_needed() const { +template +library_iterator ELFObjectFile::begin_libraries_needed() const { // Find the first DT_NEEDED entry - dyn_iterator i = begin_dynamic_table(); - dyn_iterator e = end_dynamic_table(); - error_code ec; - while (i != e) { - if (i->getTag() == ELF::DT_NEEDED) - break; - i.increment(ec); - if (ec) - report_fatal_error("dynamic table iteration failed"); - } - // Use the same DataRefImpl format as DynRef. - return library_iterator(LibraryRef(i->getRawDataRefImpl(), this)); + Elf_Dyn_iterator i = begin_dynamic_table(); + Elf_Dyn_iterator e = end_dynamic_table(); + while (i != e && i->getTag() != ELF::DT_NEEDED) + ++i; + + DataRefImpl DRI; + DRI.p = reinterpret_cast(i.get()); + return library_iterator(LibraryRef(DRI, this)); } -template -error_code ELFObjectFile - ::getLibraryNext(DataRefImpl Data, - LibraryRef &Result) const { +template +error_code ELFObjectFile::getLibraryNext(DataRefImpl Data, + LibraryRef &Result) const { // Use the same DataRefImpl format as DynRef. - dyn_iterator i = dyn_iterator(DynRef(Data, this)); - dyn_iterator e = end_dynamic_table(); + Elf_Dyn_iterator i = Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize, + reinterpret_cast(Data.p)); + Elf_Dyn_iterator e = end_dynamic_table(); - // Skip the current dynamic table entry. - error_code ec; - if (i != e) { - i.increment(ec); - // TODO: proper error handling - if (ec) - report_fatal_error("dynamic table iteration failed"); - } + // Skip the current dynamic table entry and find the next DT_NEEDED entry. + do + ++i; + while (i != e && i->getTag() != ELF::DT_NEEDED); - // Find the next DT_NEEDED entry. - while (i != e) { - if (i->getTag() == ELF::DT_NEEDED) - break; - i.increment(ec); - if (ec) - report_fatal_error("dynamic table iteration failed"); - } - Result = LibraryRef(i->getRawDataRefImpl(), this); + DataRefImpl DRI; + DRI.p = reinterpret_cast(i.get()); + Result = LibraryRef(DRI, this); return object_error::success; } -template -error_code ELFObjectFile - ::getLibraryPath(DataRefImpl Data, StringRef &Res) const { - dyn_iterator i = dyn_iterator(DynRef(Data, this)); +template +error_code ELFObjectFile::getLibraryPath(DataRefImpl Data, + StringRef &Res) const { + Elf_Dyn_iterator i = Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize, + reinterpret_cast(Data.p)); if (i == end_dynamic_table()) report_fatal_error("getLibraryPath() called on iterator end"); @@ -2289,22 +2525,21 @@ error_code ELFObjectFile return object_error::success; } -template -library_iterator ELFObjectFile - ::end_libraries_needed() const { - dyn_iterator e = end_dynamic_table(); - // Use the same DataRefImpl format as DynRef. - return library_iterator(LibraryRef(e->getRawDataRefImpl(), this)); +template +library_iterator ELFObjectFile::end_libraries_needed() const { + Elf_Dyn_iterator e = end_dynamic_table(); + DataRefImpl DRI; + DRI.p = reinterpret_cast(e.get()); + return library_iterator(LibraryRef(DRI, this)); } -template -uint8_t ELFObjectFile::getBytesInAddress() const { - return is64Bits ? 8 : 4; +template +uint8_t ELFObjectFile::getBytesInAddress() const { + return ELFT::Is64Bits ? 8 : 4; } -template -StringRef ELFObjectFile - ::getFileFormatName() const { +template +StringRef ELFObjectFile::getFileFormatName() const { switch(Header->e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: switch(Header->e_machine) { @@ -2316,6 +2551,8 @@ StringRef ELFObjectFile return "ELF32-arm"; case ELF::EM_HEXAGON: return "ELF32-hexagon"; + case ELF::EM_MIPS: + return "ELF32-mips"; default: return "ELF32-unknown"; } @@ -2325,6 +2562,8 @@ StringRef ELFObjectFile return "ELF64-i386"; case ELF::EM_X86_64: return "ELF64-x86-64"; + case ELF::EM_AARCH64: + return "ELF64-aarch64"; case ELF::EM_PPC64: return "ELF64-ppc64"; default: @@ -2336,19 +2575,21 @@ StringRef ELFObjectFile } } -template -unsigned ELFObjectFile::getArch() const { +template +unsigned ELFObjectFile::getArch() const { switch(Header->e_machine) { case ELF::EM_386: return Triple::x86; case ELF::EM_X86_64: return Triple::x86_64; + case ELF::EM_AARCH64: + return Triple::aarch64; case ELF::EM_ARM: return Triple::arm; case ELF::EM_HEXAGON: return Triple::hexagon; case ELF::EM_MIPS: - return (target_endianness == support::little) ? + return (ELFT::TargetEndianness == support::little) ? Triple::mipsel : Triple::mips; case ELF::EM_PPC64: return Triple::ppc64; @@ -2357,8 +2598,8 @@ unsigned ELFObjectFile::getArch() const { } } -template -uint64_t ELFObjectFile::getNumSections() const { +template +uint64_t ELFObjectFile::getNumSections() const { assert(Header && "Header not initialized!"); if (Header->e_shnum == ELF::SHN_UNDEF) { assert(SectionHeaderTable && "SectionHeaderTable not initialized!"); @@ -2367,9 +2608,9 @@ uint64_t ELFObjectFile::getNumSections() const { return Header->e_shnum; } -template +template uint64_t -ELFObjectFile::getStringTableIndex() const { +ELFObjectFile::getStringTableIndex() const { if (Header->e_shnum == ELF::SHN_UNDEF) { if (Header->e_shstrndx == ELF::SHN_HIRESERVE) return SectionHeaderTable->sh_link; @@ -2379,53 +2620,44 @@ ELFObjectFile::getStringTableIndex() const { return Header->e_shstrndx; } - -template +template template inline const T * -ELFObjectFile::getEntry(uint16_t Section, - uint32_t Entry) const { +ELFObjectFile::getEntry(uint16_t Section, uint32_t Entry) const { return getEntry(getSection(Section), Entry); } -template +template template inline const T * -ELFObjectFile::getEntry(const Elf_Shdr * Section, - uint32_t Entry) const { +ELFObjectFile::getEntry(const Elf_Shdr * Section, uint32_t Entry) const { return reinterpret_cast( base() + Section->sh_offset + (Entry * Section->sh_entsize)); } -template -const typename ELFObjectFile::Elf_Sym * -ELFObjectFile::getSymbol(DataRefImpl Symb) const { +template +const typename ELFObjectFile::Elf_Sym * +ELFObjectFile::getSymbol(DataRefImpl Symb) const { return getEntry(SymbolTableSections[Symb.d.b], Symb.d.a); } -template -const typename ELFObjectFile::Elf_Dyn * -ELFObjectFile::getDyn(DataRefImpl DynData) const { - return getEntry(dot_dynamic_sec, DynData.d.a); -} - -template -const typename ELFObjectFile::Elf_Rel * -ELFObjectFile::getRel(DataRefImpl Rel) const { +template +const typename ELFObjectFile::Elf_Rel * +ELFObjectFile::getRel(DataRefImpl Rel) const { return getEntry(Rel.w.b, Rel.w.c); } -template -const typename ELFObjectFile::Elf_Rela * -ELFObjectFile::getRela(DataRefImpl Rela) const { +template +const typename ELFObjectFile::Elf_Rela * +ELFObjectFile::getRela(DataRefImpl Rela) const { return getEntry(Rela.w.b, Rela.w.c); } -template -const typename ELFObjectFile::Elf_Shdr * -ELFObjectFile::getSection(DataRefImpl Symb) const { +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getSection(DataRefImpl Symb) const { const Elf_Shdr *sec = getSection(Symb.d.b); if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM) // FIXME: Proper error handling. @@ -2433,9 +2665,9 @@ ELFObjectFile::getSection(DataRefImpl Symb) const { return sec; } -template -const typename ELFObjectFile::Elf_Shdr * -ELFObjectFile::getSection(uint32_t index) const { +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getSection(uint32_t index) const { if (index == 0) return 0; if (!SectionHeaderTable || index >= getNumSections()) @@ -2447,17 +2679,15 @@ ELFObjectFile::getSection(uint32_t index) const { + (index * Header->e_shentsize)); } -template -const char *ELFObjectFile - ::getString(uint32_t section, - ELF::Elf32_Word offset) const { +template +const char *ELFObjectFile::getString(uint32_t section, + ELF::Elf32_Word offset) const { return getString(getSection(section), offset); } -template -const char *ELFObjectFile - ::getString(const Elf_Shdr *section, - ELF::Elf32_Word offset) const { +template +const char *ELFObjectFile::getString(const Elf_Shdr *section, + ELF::Elf32_Word offset) const { assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!"); if (offset >= section->sh_size) // FIXME: Proper error handling. @@ -2465,11 +2695,10 @@ const char *ELFObjectFile return (const char *)base() + section->sh_offset + offset; } -template -error_code ELFObjectFile - ::getSymbolName(const Elf_Shdr *section, - const Elf_Sym *symb, - StringRef &Result) const { +template +error_code ELFObjectFile::getSymbolName(const Elf_Shdr *section, + const Elf_Sym *symb, + StringRef &Result) const { if (symb->st_name == 0) { const Elf_Shdr *section = getSection(symb); if (!section) @@ -2489,20 +2718,18 @@ error_code ELFObjectFile return object_error::success; } -template -error_code ELFObjectFile - ::getSectionName(const Elf_Shdr *section, - StringRef &Result) const { +template +error_code ELFObjectFile::getSectionName(const Elf_Shdr *section, + StringRef &Result) const { Result = StringRef(getString(dot_shstrtab_sec, section->sh_name)); return object_error::success; } -template -error_code ELFObjectFile - ::getSymbolVersion(const Elf_Shdr *section, - const Elf_Sym *symb, - StringRef &Version, - bool &IsDefault) const { +template +error_code ELFObjectFile::getSymbolVersion(const Elf_Shdr *section, + const Elf_Sym *symb, + StringRef &Version, + bool &IsDefault) const { // Handle non-dynamic symbols. if (section != SymbolTableSections[0]) { // Non-dynamic symbols can have versions in their names @@ -2580,54 +2807,6 @@ error_code ELFObjectFile return object_error::success; } -template -inline DynRefImpl - ::DynRefImpl(DataRefImpl DynP, const OwningType *Owner) - : DynPimpl(DynP) - , OwningObject(Owner) {} - -template -inline bool DynRefImpl - ::operator==(const DynRefImpl &Other) const { - return DynPimpl == Other.DynPimpl; -} - -template -inline bool DynRefImpl - ::operator <(const DynRefImpl &Other) const { - return DynPimpl < Other.DynPimpl; -} - -template -inline error_code DynRefImpl - ::getNext(DynRefImpl &Result) const { - return OwningObject->getDynNext(DynPimpl, Result); -} - -template -inline int64_t DynRefImpl - ::getTag() const { - return OwningObject->getDyn(DynPimpl)->d_tag; -} - -template -inline uint64_t DynRefImpl - ::getVal() const { - return OwningObject->getDyn(DynPimpl)->d_un.d_val; -} - -template -inline uint64_t DynRefImpl - ::getPtr() const { - return OwningObject->getDyn(DynPimpl)->d_un.d_ptr; -} - -template -inline DataRefImpl DynRefImpl - ::getRawDataRefImpl() const { - return DynPimpl; -} - /// This is a generic interface for retrieving GNU symbol version /// information from an ELFObjectFile. static inline error_code GetELFSymbolVersion(const ObjectFile *Obj, @@ -2635,28 +2814,43 @@ static inline error_code GetELFSymbolVersion(const ObjectFile *Obj, StringRef &Version, bool &IsDefault) { // Little-endian 32-bit - if (const ELFObjectFile *ELFObj = - dyn_cast >(Obj)) + if (const ELFObjectFile > *ELFObj = + dyn_cast > >(Obj)) return ELFObj->getSymbolVersion(Sym, Version, IsDefault); // Big-endian 32-bit - if (const ELFObjectFile *ELFObj = - dyn_cast >(Obj)) + if (const ELFObjectFile > *ELFObj = + dyn_cast > >(Obj)) return ELFObj->getSymbolVersion(Sym, Version, IsDefault); // Little-endian 64-bit - if (const ELFObjectFile *ELFObj = - dyn_cast >(Obj)) + if (const ELFObjectFile > *ELFObj = + dyn_cast > >(Obj)) return ELFObj->getSymbolVersion(Sym, Version, IsDefault); // Big-endian 64-bit - if (const ELFObjectFile *ELFObj = - dyn_cast >(Obj)) + if (const ELFObjectFile > *ELFObj = + dyn_cast > >(Obj)) return ELFObj->getSymbolVersion(Sym, Version, IsDefault); llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF"); } +/// This function returns the hash value for a symbol in the .dynsym section +/// Name of the API remains consistent as specified in the libelf +/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash +static inline unsigned elf_hash(StringRef &symbolName) { + unsigned h = 0, g; + for (unsigned i = 0, j = symbolName.size(); i < j; i++) { + h = (h << 4) + symbolName[i]; + g = h & 0xf0000000L; + if (g != 0) + h ^= g >> 24; + h &= ~g; + } + return h; +} + } } diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index 4e03daab16a3..ed7aabd2c868 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -15,11 +15,11 @@ #ifndef LLVM_OBJECT_MACHO_H #define LLVM_OBJECT_MACHO_H -#include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/MachOObject.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallVector.h" namespace llvm { namespace object { @@ -44,7 +44,12 @@ public: virtual unsigned getArch() const; virtual StringRef getLoadName() const; - MachOObject *getObject() { return MachOObj; } + // In a MachO file, sections have a segment name. This is used in the .o + // files. They have a single segment, but this field specifies which segment + // a section should be put in in the final object. + error_code getSectionFinalSegmentName(DataRefImpl Sec, StringRef &Res) const; + + MachOObject *getObject() { return MachOObj.get(); } static inline bool classof(const Binary *v) { return v->isMachO(); @@ -104,7 +109,7 @@ protected: virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const; private: - MachOObject *MachOObj; + OwningPtr MachOObj; mutable uint32_t RegisteredStringTable; typedef SmallVector SectionList; SectionList Sections; diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h index c0f700d3c870..ffca391ea228 100644 --- a/include/llvm/Object/MachOFormat.h +++ b/include/llvm/Object/MachOFormat.h @@ -64,7 +64,10 @@ namespace mach { CSARM_V7 = 9, CSARM_V7F = 10, CSARM_V7S = 11, - CSARM_V7K = 12 + CSARM_V7K = 12, + CSARM_V6M = 14, + CSARM_V7M = 15, + CSARM_V7EM = 16 }; /// \brief PowerPC Machine Subtypes. @@ -145,7 +148,8 @@ namespace macho { LCT_CodeSignature = 0x1d, LCT_SegmentSplitInfo = 0x1e, LCT_FunctionStarts = 0x26, - LCT_DataInCode = 0x29 + LCT_DataInCode = 0x29, + LCT_LinkerOptions = 0x2D }; /// \brief Load command structure. @@ -233,10 +237,22 @@ namespace macho { uint32_t DataSize; }; + struct LinkerOptionsLoadCommand { + uint32_t Type; + uint32_t Size; + uint32_t Count; + // Load command is followed by Count number of zero-terminated UTF8 strings, + // and then zero-filled to be 4-byte aligned. + }; + /// @} /// @name Section Data /// @{ + enum SectionFlags { + SF_PureInstructions = 0x80000000 + }; + struct Section { char Name[16]; char SegmentName[16]; diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h index 86f150a2940f..9e4ab199f572 100644 --- a/include/llvm/Object/MachOObject.h +++ b/include/llvm/Object/MachOObject.h @@ -10,11 +10,11 @@ #ifndef LLVM_OBJECT_MACHOOBJECT_H #define LLVM_OBJECT_MACHOOBJECT_H -#include #include "llvm/ADT/InMemoryStruct.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/MachOFormat.h" +#include namespace llvm { @@ -153,6 +153,9 @@ public: void ReadLinkeditDataLoadCommand( const LoadCommandInfo &LCI, InMemoryStruct &Res) const; + void ReadLinkerOptionsLoadCommand( + const LoadCommandInfo &LCI, + InMemoryStruct &Res) const; void ReadIndirectSymbolTableEntry( const macho::DysymtabLoadCommand &DLC, unsigned Index, diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 1a3120ab8ba3..6a66653fe223 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_OBJECT_OBJECT_FILE_H -#define LLVM_OBJECT_OBJECT_FILE_H +#ifndef LLVM_OBJECT_OBJECTFILE_H +#define LLVM_OBJECT_OBJECTFILE_H -#include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/Binary.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h index 7668bdedb7bb..2dcbdf905327 100644 --- a/include/llvm/Object/RelocVisitor.h +++ b/include/llvm/Object/RelocVisitor.h @@ -13,14 +13,14 @@ // //===----------------------------------------------------------------------===// -#ifndef _LLVM_OBJECT_RELOCVISITOR -#define _LLVM_OBJECT_RELOCVISITOR +#ifndef LLVM_OBJECT_RELOCVISITOR_H +#define LLVM_OBJECT_RELOCVISITOR_H -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Object/ELF.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace object { @@ -40,7 +40,7 @@ struct RelocToApply { /// @brief Base class for object file relocation visitors. class RelocVisitor { public: - explicit RelocVisitor(llvm::StringRef FileFormat) + explicit RelocVisitor(StringRef FileFormat) : FileFormat(FileFormat), HasError(false) {} // TODO: Should handle multiple applied relocations via either passing in the @@ -64,35 +64,77 @@ public: HasError = true; return RelocToApply(); } + } else if (FileFormat == "ELF32-i386") { + switch (RelocType) { + case llvm::ELF::R_386_NONE: + return visitELF_386_NONE(R); + case llvm::ELF::R_386_32: + return visitELF_386_32(R, Value); + case llvm::ELF::R_386_PC32: + return visitELF_386_PC32(R, Value, SecAddr); + default: + HasError = true; + return RelocToApply(); + } + } else if (FileFormat == "ELF64-ppc64") { + switch (RelocType) { + case llvm::ELF::R_PPC64_ADDR32: + return visitELF_PPC64_ADDR32(R, Value); + default: + HasError = true; + return RelocToApply(); + } + } else if (FileFormat == "ELF32-mips") { + switch (RelocType) { + case llvm::ELF::R_MIPS_32: + return visitELF_MIPS_32(R, Value); + default: + HasError = true; + return RelocToApply(); + } + } else if (FileFormat == "ELF64-aarch64") { + switch (RelocType) { + case llvm::ELF::R_AARCH64_ABS32: + return visitELF_AARCH64_ABS32(R, Value); + case llvm::ELF::R_AARCH64_ABS64: + return visitELF_AARCH64_ABS64(R, Value); + default: + HasError = true; + return RelocToApply(); + } } + HasError = true; return RelocToApply(); } bool error() { return HasError; } private: - llvm::StringRef FileFormat; + StringRef FileFormat; bool HasError; /// Operations - // Width is the width in bytes of the extend. - RelocToApply zeroExtend(RelocToApply r, char Width) { - if (Width == r.Width) - return r; - r.Value &= (1 << ((Width * 8))) - 1; - return r; + /// 386-ELF + RelocToApply visitELF_386_NONE(RelocationRef R) { + return RelocToApply(0, 0); } - RelocToApply signExtend(RelocToApply r, char Width) { - if (Width == r.Width) - return r; - bool SignBit = r.Value & (1 << ((Width * 8) - 1)); - if (SignBit) { - r.Value |= ~((1 << (Width * 8)) - 1); - } else { - r.Value &= (1 << (Width * 8)) - 1; - } - return r; + + // Ideally the Addend here will be the addend in the data for + // the relocation. It's not actually the case for Rel relocations. + RelocToApply visitELF_386_32(RelocationRef R, uint64_t Value) { + int64_t Addend; + R.getAdditionalInfo(Addend); + return RelocToApply(Value + Addend, 4); + } + + RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value, + uint64_t SecAddr) { + int64_t Addend; + R.getAdditionalInfo(Addend); + uint64_t Address; + R.getAddress(Address); + return RelocToApply(Value + Addend - Address, 4); } /// X86-64 ELF @@ -124,6 +166,42 @@ private: int32_t Res = (Value + Addend) & 0xFFFFFFFF; return RelocToApply(Res, 4); } + + /// PPC64 ELF + RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) { + int64_t Addend; + R.getAdditionalInfo(Addend); + uint32_t Res = (Value + Addend) & 0xFFFFFFFF; + return RelocToApply(Res, 4); + } + + /// MIPS ELF + RelocToApply visitELF_MIPS_32(RelocationRef R, uint64_t Value) { + int64_t Addend; + R.getAdditionalInfo(Addend); + uint32_t Res = (Value + Addend) & 0xFFFFFFFF; + return RelocToApply(Res, 4); + } + + // AArch64 ELF + RelocToApply visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) { + int64_t Addend; + R.getAdditionalInfo(Addend); + int64_t Res = Value + Addend; + + // Overflow check allows for both signed and unsigned interpretation. + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + + return RelocToApply(static_cast(Res), 4); + } + + RelocToApply visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) { + int64_t Addend; + R.getAdditionalInfo(Addend); + return RelocToApply(Value + Addend, 8); + } + }; } diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h new file mode 100644 index 000000000000..6b8ed3f7d2b1 --- /dev/null +++ b/include/llvm/Option/Arg.h @@ -0,0 +1,132 @@ +//===--- Arg.h - Parsed Argument Classes ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the llvm::Arg class for parsed arguments. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPTION_ARG_H +#define LLVM_OPTION_ARG_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/Option.h" +#include + +namespace llvm { +namespace opt { +class ArgList; + +/// \brief A concrete instance of a particular driver option. +/// +/// The Arg class encodes just enough information to be able to +/// derive the argument values efficiently. In addition, Arg +/// instances have an intrusive double linked list which is used by +/// ArgList to provide efficient iteration over all instances of a +/// particular option. +class Arg { + Arg(const Arg &) LLVM_DELETED_FUNCTION; + void operator=(const Arg &) LLVM_DELETED_FUNCTION; + +private: + /// \brief The option this argument is an instance of. + const Option Opt; + + /// \brief The argument this argument was derived from (during tool chain + /// argument translation), if any. + const Arg *BaseArg; + + /// \brief How this instance of the option was spelled. + StringRef Spelling; + + /// \brief The index at which this argument appears in the containing + /// ArgList. + unsigned Index; + + /// \brief Was this argument used to effect compilation? + /// + /// This is used for generating "argument unused" diagnostics. + mutable unsigned Claimed : 1; + + /// \brief Does this argument own its values? + mutable unsigned OwnsValues : 1; + + /// \brief The argument values, as C strings. + SmallVector Values; + +public: + Arg(const Option Opt, StringRef Spelling, unsigned Index, + const Arg *BaseArg = 0); + Arg(const Option Opt, StringRef Spelling, unsigned Index, + const char *Value0, const Arg *BaseArg = 0); + Arg(const Option Opt, StringRef Spelling, unsigned Index, + const char *Value0, const char *Value1, const Arg *BaseArg = 0); + ~Arg(); + + const Option getOption() const { return Opt; } + StringRef getSpelling() const { return Spelling; } + unsigned getIndex() const { return Index; } + + /// \brief Return the base argument which generated this arg. + /// + /// This is either the argument itself or the argument it was + /// derived from during tool chain specific argument translation. + const Arg &getBaseArg() const { + return BaseArg ? *BaseArg : *this; + } + void setBaseArg(const Arg *_BaseArg) { + BaseArg = _BaseArg; + } + + bool getOwnsValues() const { return OwnsValues; } + void setOwnsValues(bool Value) const { OwnsValues = Value; } + + bool isClaimed() const { return getBaseArg().Claimed; } + + /// \brief Set the Arg claimed bit. + void claim() const { getBaseArg().Claimed = true; } + + unsigned getNumValues() const { return Values.size(); } + const char *getValue(unsigned N = 0) const { + return Values[N]; + } + + SmallVectorImpl &getValues() { + return Values; + } + + bool containsValue(StringRef Value) const { + for (unsigned i = 0, e = getNumValues(); i != e; ++i) + if (Values[i] == Value) + return true; + return false; + } + + /// \brief Append the argument onto the given array as strings. + void render(const ArgList &Args, ArgStringList &Output) const; + + /// \brief Append the argument, render as an input, onto the given + /// array as strings. + /// + /// The distinction is that some options only render their values + /// when rendered as a input (e.g., Xlinker). + void renderAsInput(const ArgList &Args, ArgStringList &Output) const; + + void dump() const; + + /// \brief Return a formatted version of the argument and + /// its values, for debugging and diagnostics. + std::string getAsString(const ArgList &Args) const; +}; + +} // end namespace opt +} // end namespace llvm + +#endif diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h new file mode 100644 index 000000000000..d3accfe7f1e0 --- /dev/null +++ b/include/llvm/Option/ArgList.h @@ -0,0 +1,414 @@ +//===--- ArgList.h - Argument List Management -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPTION_ARGLIST_H +#define LLVM_OPTION_ARGLIST_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Option/Option.h" +#include +#include +#include + +namespace llvm { +namespace opt { +class Arg; +class ArgList; +class Option; + +/// arg_iterator - Iterates through arguments stored inside an ArgList. +class arg_iterator { + /// The current argument. + SmallVectorImpl::const_iterator Current; + + /// The argument list we are iterating over. + const ArgList &Args; + + /// Optional filters on the arguments which will be match. Most clients + /// should never want to iterate over arguments without filters, so we won't + /// bother to factor this into two separate iterator implementations. + // + // FIXME: Make efficient; the idea is to provide efficient iteration over + // all arguments which match a particular id and then just provide an + // iterator combinator which takes multiple iterators which can be + // efficiently compared and returns them in order. + OptSpecifier Id0, Id1, Id2; + + void SkipToNextArg(); + +public: + typedef Arg * const * value_type; + typedef Arg * const & reference; + typedef Arg * const * pointer; + typedef std::forward_iterator_tag iterator_category; + typedef std::ptrdiff_t difference_type; + + arg_iterator(SmallVectorImpl::const_iterator it, + const ArgList &_Args, OptSpecifier _Id0 = 0U, + OptSpecifier _Id1 = 0U, OptSpecifier _Id2 = 0U) + : Current(it), Args(_Args), Id0(_Id0), Id1(_Id1), Id2(_Id2) { + SkipToNextArg(); + } + + operator const Arg*() { return *Current; } + reference operator*() const { return *Current; } + pointer operator->() const { return Current; } + + arg_iterator &operator++() { + ++Current; + SkipToNextArg(); + return *this; + } + + arg_iterator operator++(int) { + arg_iterator tmp(*this); + ++(*this); + return tmp; + } + + friend bool operator==(arg_iterator LHS, arg_iterator RHS) { + return LHS.Current == RHS.Current; + } + friend bool operator!=(arg_iterator LHS, arg_iterator RHS) { + return !(LHS == RHS); + } +}; + +/// ArgList - Ordered collection of driver arguments. +/// +/// The ArgList class manages a list of Arg instances as well as +/// auxiliary data and convenience methods to allow Tools to quickly +/// check for the presence of Arg instances for a particular Option +/// and to iterate over groups of arguments. +class ArgList { +private: + ArgList(const ArgList &) LLVM_DELETED_FUNCTION; + void operator=(const ArgList &) LLVM_DELETED_FUNCTION; + +public: + typedef SmallVector arglist_type; + typedef arglist_type::iterator iterator; + typedef arglist_type::const_iterator const_iterator; + typedef arglist_type::reverse_iterator reverse_iterator; + typedef arglist_type::const_reverse_iterator const_reverse_iterator; + +private: + /// The internal list of arguments. + arglist_type Args; + +protected: + ArgList(); + +public: + virtual ~ArgList(); + + /// @name Arg Access + /// @{ + + /// append - Append \p A to the arg list. + void append(Arg *A); + + arglist_type &getArgs() { return Args; } + const arglist_type &getArgs() const { return Args; } + + unsigned size() const { return Args.size(); } + + /// @} + /// @name Arg Iteration + /// @{ + + iterator begin() { return Args.begin(); } + iterator end() { return Args.end(); } + + reverse_iterator rbegin() { return Args.rbegin(); } + reverse_iterator rend() { return Args.rend(); } + + const_iterator begin() const { return Args.begin(); } + const_iterator end() const { return Args.end(); } + + const_reverse_iterator rbegin() const { return Args.rbegin(); } + const_reverse_iterator rend() const { return Args.rend(); } + + arg_iterator filtered_begin(OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U, + OptSpecifier Id2 = 0U) const { + return arg_iterator(Args.begin(), *this, Id0, Id1, Id2); + } + arg_iterator filtered_end() const { + return arg_iterator(Args.end(), *this); + } + + /// @} + /// @name Arg Removal + /// @{ + + /// eraseArg - Remove any option matching \p Id. + void eraseArg(OptSpecifier Id); + + /// @} + /// @name Arg Access + /// @{ + + /// hasArg - Does the arg list contain any option matching \p Id. + /// + /// \p Claim Whether the argument should be claimed, if it exists. + bool hasArgNoClaim(OptSpecifier Id) const { + return getLastArgNoClaim(Id) != 0; + } + bool hasArg(OptSpecifier Id) const { + return getLastArg(Id) != 0; + } + bool hasArg(OptSpecifier Id0, OptSpecifier Id1) const { + return getLastArg(Id0, Id1) != 0; + } + bool hasArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { + return getLastArg(Id0, Id1, Id2) != 0; + } + + /// getLastArg - Return the last argument matching \p Id, or null. + /// + /// \p Claim Whether the argument should be claimed, if it exists. + Arg *getLastArgNoClaim(OptSpecifier Id) const; + Arg *getLastArg(OptSpecifier Id) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, + OptSpecifier Id3) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, + OptSpecifier Id3, OptSpecifier Id4) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, + OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, + OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, + OptSpecifier Id6) const; + Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, + OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, + OptSpecifier Id6, OptSpecifier Id7) const; + + /// getArgString - Return the input argument string at \p Index. + virtual const char *getArgString(unsigned Index) const = 0; + + /// getNumInputArgStrings - Return the number of original argument strings, + /// which are guaranteed to be the first strings in the argument string + /// list. + virtual unsigned getNumInputArgStrings() const = 0; + + /// @} + /// @name Argument Lookup Utilities + /// @{ + + /// getLastArgValue - Return the value of the last argument, or a default. + StringRef getLastArgValue(OptSpecifier Id, + StringRef Default = "") const; + + /// getAllArgValues - Get the values of all instances of the given argument + /// as strings. + std::vector getAllArgValues(OptSpecifier Id) const; + + /// @} + /// @name Translation Utilities + /// @{ + + /// hasFlag - Given an option \p Pos and its negative form \p Neg, return + /// true if the option is present, false if the negation is present, and + /// \p Default if neither option is given. If both the option and its + /// negation are present, the last one wins. + bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default=true) const; + + /// AddLastArg - Render only the last argument match \p Id0, if present. + void AddLastArg(ArgStringList &Output, OptSpecifier Id0) const; + + /// AddAllArgs - Render all arguments matching the given ids. + void AddAllArgs(ArgStringList &Output, OptSpecifier Id0, + OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const; + + /// AddAllArgValues - Render the argument values of all arguments + /// matching the given ids. + void AddAllArgValues(ArgStringList &Output, OptSpecifier Id0, + OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const; + + /// AddAllArgsTranslated - Render all the arguments matching the + /// given ids, but forced to separate args and using the provided + /// name instead of the first option value. + /// + /// \param Joined - If true, render the argument as joined with + /// the option specifier. + void AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0, + const char *Translation, + bool Joined = false) const; + + /// ClaimAllArgs - Claim all arguments which match the given + /// option id. + void ClaimAllArgs(OptSpecifier Id0) const; + + /// ClaimAllArgs - Claim all arguments. + /// + void ClaimAllArgs() const; + + /// @} + /// @name Arg Synthesis + /// @{ + + /// MakeArgString - Construct a constant string pointer whose + /// lifetime will match that of the ArgList. + virtual const char *MakeArgString(StringRef Str) const = 0; + const char *MakeArgString(const char *Str) const { + return MakeArgString(StringRef(Str)); + } + const char *MakeArgString(std::string Str) const { + return MakeArgString(StringRef(Str)); + } + const char *MakeArgString(const Twine &Str) const; + + /// \brief Create an arg string for (\p LHS + \p RHS), reusing the + /// string at \p Index if possible. + const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS, + StringRef RHS) const; + + /// @} +}; + +class InputArgList : public ArgList { +private: + /// List of argument strings used by the contained Args. + /// + /// This is mutable since we treat the ArgList as being the list + /// of Args, and allow routines to add new strings (to have a + /// convenient place to store the memory) via MakeIndex. + mutable ArgStringList ArgStrings; + + /// Strings for synthesized arguments. + /// + /// This is mutable since we treat the ArgList as being the list + /// of Args, and allow routines to add new strings (to have a + /// convenient place to store the memory) via MakeIndex. + mutable std::list SynthesizedStrings; + + /// The number of original input argument strings. + unsigned NumInputArgStrings; + +public: + InputArgList(const char* const *ArgBegin, const char* const *ArgEnd); + ~InputArgList(); + + virtual const char *getArgString(unsigned Index) const { + return ArgStrings[Index]; + } + + virtual unsigned getNumInputArgStrings() const { + return NumInputArgStrings; + } + + /// @name Arg Synthesis + /// @{ + +public: + /// MakeIndex - Get an index for the given string(s). + unsigned MakeIndex(StringRef String0) const; + unsigned MakeIndex(StringRef String0, StringRef String1) const; + + virtual const char *MakeArgString(StringRef Str) const; + + /// @} +}; + +/// DerivedArgList - An ordered collection of driver arguments, +/// whose storage may be in another argument list. +class DerivedArgList : public ArgList { + const InputArgList &BaseArgs; + + /// The list of arguments we synthesized. + mutable arglist_type SynthesizedArgs; + +public: + /// Construct a new derived arg list from \p BaseArgs. + DerivedArgList(const InputArgList &BaseArgs); + ~DerivedArgList(); + + virtual const char *getArgString(unsigned Index) const { + return BaseArgs.getArgString(Index); + } + + virtual unsigned getNumInputArgStrings() const { + return BaseArgs.getNumInputArgStrings(); + } + + const InputArgList &getBaseArgs() const { + return BaseArgs; + } + + /// @name Arg Synthesis + /// @{ + + /// AddSynthesizedArg - Add a argument to the list of synthesized arguments + /// (to be freed). + void AddSynthesizedArg(Arg *A) { + SynthesizedArgs.push_back(A); + } + + virtual const char *MakeArgString(StringRef Str) const; + + /// AddFlagArg - Construct a new FlagArg for the given option \p Id and + /// append it to the argument list. + void AddFlagArg(const Arg *BaseArg, const Option Opt) { + append(MakeFlagArg(BaseArg, Opt)); + } + + /// AddPositionalArg - Construct a new Positional arg for the given option + /// \p Id, with the provided \p Value and append it to the argument + /// list. + void AddPositionalArg(const Arg *BaseArg, const Option Opt, + StringRef Value) { + append(MakePositionalArg(BaseArg, Opt, Value)); + } + + + /// AddSeparateArg - Construct a new Positional arg for the given option + /// \p Id, with the provided \p Value and append it to the argument + /// list. + void AddSeparateArg(const Arg *BaseArg, const Option Opt, + StringRef Value) { + append(MakeSeparateArg(BaseArg, Opt, Value)); + } + + + /// AddJoinedArg - Construct a new Positional arg for the given option + /// \p Id, with the provided \p Value and append it to the argument list. + void AddJoinedArg(const Arg *BaseArg, const Option Opt, + StringRef Value) { + append(MakeJoinedArg(BaseArg, Opt, Value)); + } + + + /// MakeFlagArg - Construct a new FlagArg for the given option \p Id. + Arg *MakeFlagArg(const Arg *BaseArg, const Option Opt) const; + + /// MakePositionalArg - Construct a new Positional arg for the + /// given option \p Id, with the provided \p Value. + Arg *MakePositionalArg(const Arg *BaseArg, const Option Opt, + StringRef Value) const; + + /// MakeSeparateArg - Construct a new Positional arg for the + /// given option \p Id, with the provided \p Value. + Arg *MakeSeparateArg(const Arg *BaseArg, const Option Opt, + StringRef Value) const; + + /// MakeJoinedArg - Construct a new Positional arg for the + /// given option \p Id, with the provided \p Value. + Arg *MakeJoinedArg(const Arg *BaseArg, const Option Opt, + StringRef Value) const; + + /// @} +}; + +} // end namespace opt +} // end namespace llvm + +#endif diff --git a/include/llvm/Option/OptParser.td b/include/llvm/Option/OptParser.td new file mode 100644 index 000000000000..e781fa02d75b --- /dev/null +++ b/include/llvm/Option/OptParser.td @@ -0,0 +1,127 @@ +//===--- OptParser.td - Common Option Parsing Interfaces ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common interfaces used by the option parsing TableGen +// backend. +// +//===----------------------------------------------------------------------===// + +// Define the kinds of options. + +class OptionKind { + string Name = name; + // The kind precedence, kinds with lower precedence are matched first. + int Precedence = predecence; + // Indicate a sentinel option. + bit Sentinel = sentinel; +} + +// An option group. +def KIND_GROUP : OptionKind<"Group">; +// The input option kind. +def KIND_INPUT : OptionKind<"Input", 1, 1>; +// The unknown option kind. +def KIND_UNKNOWN : OptionKind<"Unknown", 2, 1>; +// A flag with no values. +def KIND_FLAG : OptionKind<"Flag">; +// An option which prefixes its (single) value. +def KIND_JOINED : OptionKind<"Joined", 1>; +// An option which is followed by its value. +def KIND_SEPARATE : OptionKind<"Separate">; +// An option followed by its values, which are separated by commas. +def KIND_COMMAJOINED : OptionKind<"CommaJoined">; +// An option which is which takes multiple (separate) arguments. +def KIND_MULTIARG : OptionKind<"MultiArg">; +// An option which is either joined to its (non-empty) value, or followed by its +// value. +def KIND_JOINED_OR_SEPARATE : OptionKind<"JoinedOrSeparate">; +// An option which is both joined to its (first) value, and followed by its +// (second) value. +def KIND_JOINED_AND_SEPARATE : OptionKind<"JoinedAndSeparate">; + +// Define the option flags. + +class OptionFlag {} + +// HelpHidden - The option should not be displayed in --help, even if it has +// help text. Clients *can* use this in conjunction with the OptTable::PrintHelp +// arguments to implement hidden help groups. +def HelpHidden : OptionFlag; + +// RenderAsInput - The option should not render the name when rendered as an +// input (i.e., the option is rendered as values). +def RenderAsInput : OptionFlag; + +// RenderJoined - The option should be rendered joined, even if separate (only +// sensible on single value separate options). +def RenderJoined : OptionFlag; + +// RenderSeparate - The option should be rendered separately, even if joined +// (only sensible on joined options). +def RenderSeparate : OptionFlag; + +// Define the option group class. + +class OptionGroup { + string EnumName = ?; // Uses the def name if undefined. + string Name = name; + string HelpText = ?; + OptionGroup Group = ?; +} + +// Define the option class. + +class Option prefixes, string name, OptionKind kind> { + string EnumName = ?; // Uses the def name if undefined. + list Prefixes = prefixes; + string Name = name; + OptionKind Kind = kind; + // Used by MultiArg option kind. + int NumArgs = 0; + string HelpText = ?; + string MetaVarName = ?; + list Flags = []; + OptionGroup Group = ?; + Option Alias = ?; +} + +// Helpers for defining options. + +class Flag prefixes, string name> + : Option; +class Joined prefixes, string name> + : Option; +class Separate prefixes, string name> + : Option; +class CommaJoined prefixes, string name> + : Option; +class MultiArg prefixes, string name, int numargs> + : Option { + int NumArgs = numargs; +} +class JoinedOrSeparate prefixes, string name> + : Option; +class JoinedAndSeparate prefixes, string name> + : Option; + +// Mix-ins for adding optional attributes. + +class Alias